From c2b8dea1828f35c808adcf12615893d5c740bc0a Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Fri, 2 Mar 2012 16:10:00 -0500 Subject: wmaenc: limit block_align to MAX_CODED_SUPERFRAME_SIZE This is near the theoretical limit for wma frame size and is the most that our decoder can handle. Allowing higher bit rates will just end up padding each frame with empty bytes. Fixes invalid writes for avconv when using very high bit rates. CC:libav-stable@libav.org --- libavcodec/wmaenc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index c762a723b9..e24a3f4205 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -71,8 +71,12 @@ static int encode_init(AVCodecContext * avctx){ for(i = 0; i < s->nb_block_sizes; i++) ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0); - avctx->block_align= - s->block_align= avctx->bit_rate*(int64_t)s->frame_len / (avctx->sample_rate*8); + s->block_align = avctx->bit_rate * (int64_t)s->frame_len / + (avctx->sample_rate * 8); + s->block_align = FFMIN(s->block_align, MAX_CODED_SUPERFRAME_SIZE); + avctx->block_align = s->block_align; + avctx->bit_rate = avctx->block_align * 8LL * avctx->sample_rate / + s->frame_len; //av_log(NULL, AV_LOG_ERROR, "%d %d %d %d\n", s->block_align, avctx->bit_rate, s->frame_len, avctx->sample_rate); avctx->frame_size= s->frame_len; -- cgit v1.2.3 From 1ec075cfecac01f9a289965db06f76365b0b1737 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Fri, 2 Mar 2012 16:27:57 -0500 Subject: wmaenc: limit allowed sample rate to 48kHz ff_wma_init() allows up to 50kHz, but this generates an exponent band size table that requires 65 bands. The code assumes 25 bands in many places, and using sample rates higher than 48kHz will lead to buffer overwrites. CC:libav-stable@libav.org --- libavcodec/wmaenc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'libavcodec') diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index e24a3f4205..99544c0c8a 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -39,6 +39,12 @@ static int encode_init(AVCodecContext * avctx){ return AVERROR(EINVAL); } + if (avctx->sample_rate > 48000) { + av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz", + avctx->sample_rate); + return AVERROR(EINVAL); + } + if(avctx->bit_rate < 24*1000) { av_log(avctx, AV_LOG_ERROR, "bitrate too low: got %i, need 24000 or higher\n", avctx->bit_rate); -- cgit v1.2.3 From dfc4fdedf8cfc56a505579b1f2c1c5efbce4b97e Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Fri, 2 Mar 2012 16:33:33 -0500 Subject: wmaenc: require a large enough output buffer to prevent overwrites The maximum theoretical frame size is around 17000 bytes. Although in practice it will generally be much smaller, we require a larger buffer just to be safe. CC: libav-stable@libav.org --- libavcodec/wmaenc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'libavcodec') diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index 99544c0c8a..5135b982aa 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -365,6 +365,11 @@ static int encode_superframe(AVCodecContext *avctx, } } + if (buf_size < 2 * MAX_CODED_SUPERFRAME_SIZE) { + av_log(avctx, AV_LOG_ERROR, "output buffer size is too small\n"); + return AVERROR(EINVAL); + } + #if 1 total_gain= 128; for(i=64; i; i>>=1){ -- cgit v1.2.3 From 5d652e063bd3a180f9de8915e5137aa4f938846d Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Fri, 2 Mar 2012 16:42:21 -0500 Subject: wmaenc: check final frame size against output packet size Currently we have an assert() that prevents the frame from being too large, but it is more user-friendly to give an error message instead of aborting on assert(). This condition is quite unlikely due to the minimum bit rate check in encode_init(), but it is still worth having. --- libavcodec/wmaenc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index 5135b982aa..c00f13623d 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -394,11 +394,13 @@ static int encode_superframe(AVCodecContext *avctx, } #endif - encode_frame(s, s->coefs, buf, buf_size, total_gain); + if ((i = encode_frame(s, s->coefs, buf, buf_size, total_gain)) >= 0) { + av_log(avctx, AV_LOG_ERROR, "required frame size too large. please " + "use a higher bit rate.\n"); + return AVERROR(EINVAL); + } assert((put_bits_count(&s->pb) & 7) == 0); - i= s->block_align - (put_bits_count(&s->pb)+7)/8; - assert(i>=0); - while(i--) + while (i++) put_bits(&s->pb, 8, 'N'); flush_put_bits(&s->pb); -- cgit v1.2.3 From 8ed7488ea39a4ab60045e679ad5efa6a7b81ed98 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Fri, 2 Mar 2012 16:55:45 -0500 Subject: wmaenc: return s->block_align instead of recalculating it --- libavcodec/wmaenc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'libavcodec') diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index c00f13623d..789f112aee 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -404,7 +404,7 @@ static int encode_superframe(AVCodecContext *avctx, put_bits(&s->pb, 8, 'N'); flush_put_bits(&s->pb); - return put_bits_ptr(&s->pb) - s->pb.buf; + return s->block_align; } AVCodec ff_wmav1_encoder = { -- cgit v1.2.3 From 51ddf35c9017018e58c15275ff5b129647a0c94d Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Fri, 2 Mar 2012 17:11:25 -0500 Subject: wmaenc: fix m/s stereo encoding for the first frame We need to set ms_stereo in encode_init() in order to avoid incorrectly encoding the first frame as non-m/s while flagging it as m/s. Fixes an uncomfortable pop in the left channel at the start of playback. CC:libav-stable@libav.org --- libavcodec/wmaenc.c | 4 +++- tests/ref/acodec/wmav1 | 6 +++--- tests/ref/acodec/wmav2 | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index 789f112aee..6fd3494016 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -70,6 +70,8 @@ static int encode_init(AVCodecContext * avctx){ s->use_exp_vlc = flags2 & 0x0001; s->use_bit_reservoir = flags2 & 0x0002; s->use_variable_block_len = flags2 & 0x0004; + if (avctx->channels == 2) + s->ms_stereo = 1; ff_wma_init(avctx, flags2); @@ -191,7 +193,7 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], } if (s->nb_channels == 2) { - put_bits(&s->pb, 1, s->ms_stereo= 1); + put_bits(&s->pb, 1, !!s->ms_stereo); } for(ch = 0; ch < s->nb_channels; ch++) { diff --git a/tests/ref/acodec/wmav1 b/tests/ref/acodec/wmav1 index 916e4a8ab6..117aa12a8c 100644 --- a/tests/ref/acodec/wmav1 +++ b/tests/ref/acodec/wmav1 @@ -1,4 +1,4 @@ -26a7f6b0f0b7181df8df3fa589f6bf81 *./tests/data/acodec/wmav1.asf +0260385b8a54df11ad349f9ba8240fd8 *./tests/data/acodec/wmav1.asf 106004 ./tests/data/acodec/wmav1.asf -stddev:12245.52 PSNR: 14.57 MAXDIFF:65521 bytes: 1064960/ 1058400 -stddev: 2095.89 PSNR: 29.90 MAXDIFF:27658 bytes: 1056768/ 1058400 +stddev:12241.90 PSNR: 14.57 MAXDIFF:65521 bytes: 1064960/ 1058400 +stddev: 2074.79 PSNR: 29.99 MAXDIFF:27658 bytes: 1056768/ 1058400 diff --git a/tests/ref/acodec/wmav2 b/tests/ref/acodec/wmav2 index 622b6fcc36..43b19b7530 100644 --- a/tests/ref/acodec/wmav2 +++ b/tests/ref/acodec/wmav2 @@ -1,4 +1,4 @@ -7c6c0cb692af01b312ae345723674b5f *./tests/data/acodec/wmav2.asf +bdb4c312fb109f990be83a70f8ec9bdc *./tests/data/acodec/wmav2.asf 106044 ./tests/data/acodec/wmav2.asf -stddev:12249.93 PSNR: 14.57 MAXDIFF:65521 bytes: 1064960/ 1058400 -stddev: 2089.21 PSNR: 29.93 MAXDIFF:27650 bytes: 1056768/ 1058400 +stddev:12246.35 PSNR: 14.57 MAXDIFF:65521 bytes: 1064960/ 1058400 +stddev: 2068.08 PSNR: 30.02 MAXDIFF:27650 bytes: 1056768/ 1058400 -- cgit v1.2.3 From e25be4715463da3abdb99acf735bb2148c3bd5c8 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 1 Mar 2012 21:35:22 -0800 Subject: vp8: convert idct/mc x86 assembly to use cpuflags(). --- libavcodec/x86/vp8dsp-init.c | 112 ++++++++++++++++++------------------- libavcodec/x86/vp8dsp.asm | 128 +++++++++++++++++++++++-------------------- 2 files changed, 124 insertions(+), 116 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c index 3e05bb2fb9..d3f1456b71 100644 --- a/libavcodec/x86/vp8dsp-init.c +++ b/libavcodec/x86/vp8dsp-init.c @@ -29,16 +29,16 @@ /* * MC functions */ -extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_h4_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_h6_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_v4_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_epel4_v6_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); @@ -80,7 +80,7 @@ extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_bilinear4_h_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, ptrdiff_t dststride, @@ -93,7 +93,7 @@ extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); -extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride, +extern void ff_put_vp8_bilinear4_v_mmx2 (uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, ptrdiff_t dststride, @@ -139,27 +139,27 @@ static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ } #if ARCH_X86_32 -TAP_W8 (mmxext, epel, h4) -TAP_W8 (mmxext, epel, h6) -TAP_W16(mmxext, epel, h6) -TAP_W8 (mmxext, epel, v4) -TAP_W8 (mmxext, epel, v6) -TAP_W16(mmxext, epel, v6) -TAP_W8 (mmxext, bilinear, h) -TAP_W16(mmxext, bilinear, h) -TAP_W8 (mmxext, bilinear, v) -TAP_W16(mmxext, bilinear, v) +TAP_W8 (mmx2, epel, h4) +TAP_W8 (mmx2, epel, h6) +TAP_W16(mmx2, epel, h6) +TAP_W8 (mmx2, epel, v4) +TAP_W8 (mmx2, epel, v6) +TAP_W16(mmx2, epel, v6) +TAP_W8 (mmx2, bilinear, h) +TAP_W16(mmx2, bilinear, h) +TAP_W8 (mmx2, bilinear, v) +TAP_W16(mmx2, bilinear, v) #endif -TAP_W16(sse2, epel, h6) -TAP_W16(sse2, epel, v6) -TAP_W16(sse2, bilinear, h) -TAP_W16(sse2, bilinear, v) +TAP_W16(sse2, epel, h6) +TAP_W16(sse2, epel, v6) +TAP_W16(sse2, bilinear, h) +TAP_W16(sse2, bilinear, v) -TAP_W16(ssse3, epel, h6) -TAP_W16(ssse3, epel, v6) -TAP_W16(ssse3, bilinear, h) -TAP_W16(ssse3, bilinear, v) +TAP_W16(ssse3, epel, h6) +TAP_W16(ssse3, epel, v6) +TAP_W16(ssse3, bilinear, h) +TAP_W16(ssse3, bilinear, v) #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \ static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \ @@ -177,13 +177,13 @@ static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT #if ARCH_X86_32 #define HVTAPMMX(x, y) \ -HVTAP(mmxext, 8, x, y, 4, 8) \ -HVTAP(mmxext, 8, x, y, 8, 16) +HVTAP(mmx2, 8, x, y, 4, 8) \ +HVTAP(mmx2, 8, x, y, 8, 16) -HVTAP(mmxext, 8, 6, 6, 16, 16) +HVTAP(mmx2, 8, 6, 6, 16, 16) #else #define HVTAPMMX(x, y) \ -HVTAP(mmxext, 8, x, y, 4, 8) +HVTAP(mmx2, 8, x, y, 4, 8) #endif HVTAPMMX(4, 4) @@ -218,16 +218,16 @@ static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ dst, dststride, tmp, SIZE, height, mx, my); \ } -HVBILIN(mmxext, 8, 4, 8) +HVBILIN(mmx2, 8, 4, 8) #if ARCH_X86_32 -HVBILIN(mmxext, 8, 8, 16) -HVBILIN(mmxext, 8, 16, 16) +HVBILIN(mmx2, 8, 8, 16) +HVBILIN(mmx2, 8, 16, 16) #endif -HVBILIN(sse2, 8, 8, 16) -HVBILIN(sse2, 8, 16, 16) -HVBILIN(ssse3, 8, 4, 8) -HVBILIN(ssse3, 8, 8, 16) -HVBILIN(ssse3, 8, 16, 16) +HVBILIN(sse2, 8, 8, 16) +HVBILIN(sse2, 8, 16, 16) +HVBILIN(ssse3, 8, 4, 8) +HVBILIN(ssse3, 8, 8, 16) +HVBILIN(ssse3, 8, 16, 16) extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); @@ -283,7 +283,7 @@ extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ int e, int i, int hvt); DECLARE_LOOP_FILTER(mmx) -DECLARE_LOOP_FILTER(mmxext) +DECLARE_LOOP_FILTER(mmx2) DECLARE_LOOP_FILTER(sse2) DECLARE_LOOP_FILTER(ssse3) DECLARE_LOOP_FILTER(sse4) @@ -351,26 +351,26 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) /* note that 4-tap width=16 functions are missing because w=16 * is only used for luma, and luma is always a copy or sixtap. */ if (mm_flags & AV_CPU_FLAG_MMX2) { - VP8_MC_FUNC(2, 4, mmxext); - VP8_BILINEAR_MC_FUNC(2, 4, mmxext); + VP8_MC_FUNC(2, 4, mmx2); + VP8_BILINEAR_MC_FUNC(2, 4, mmx2); #if ARCH_X86_32 - VP8_LUMA_MC_FUNC(0, 16, mmxext); - VP8_MC_FUNC(1, 8, mmxext); - VP8_BILINEAR_MC_FUNC(0, 16, mmxext); - VP8_BILINEAR_MC_FUNC(1, 8, mmxext); - - c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; - - c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; - c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; - c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; - c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; - - c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; - c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; + VP8_LUMA_MC_FUNC(0, 16, mmx2); + VP8_MC_FUNC(1, 8, mmx2); + VP8_BILINEAR_MC_FUNC(0, 16, mmx2); + VP8_BILINEAR_MC_FUNC(1, 8, mmx2); + + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx2; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx2; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx2; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx2; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx2; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx2; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx2; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx2; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx2; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx2; #endif } diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index a7b83797ea..f21045d405 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -173,8 +173,8 @@ SECTION .text ; int height, int mx, int my); ;----------------------------------------------------------------------------- -%macro FILTER_SSSE3 3 -cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2 +%macro FILTER_SSSE3 1 +cglobal put_vp8_epel%1_h6, 6, 6, 8 lea r5d, [r5*3] mova m3, [filter_h6_shuf2] mova m4, [filter_h6_shuf3] @@ -189,7 +189,7 @@ cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2 movu m0, [r2-2] mova m1, m0 mova m2, m0 -%ifidn %1, 4 +%if mmsize == 8 ; For epel4, we need 9 bytes, but only 8 get loaded; to compensate, do the ; shuffle with a memory operand punpcklbw m0, [r2+3] @@ -215,7 +215,7 @@ cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2 jg .nextrow REP_RET -cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3 +cglobal put_vp8_epel%1_h4, 6, 6, 7 shl r5d, 4 mova m2, [pw_64] mova m3, [filter_h2_shuf] @@ -246,7 +246,7 @@ cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3 jg .nextrow REP_RET -cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2 +cglobal put_vp8_epel%1_v4, 7, 7, 8 shl r6d, 4 %ifdef PIC lea r11, [fourtap_filter_hb_m] @@ -285,7 +285,7 @@ cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2 jg .nextrow REP_RET -cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2 +cglobal put_vp8_epel%1_v6, 7, 7, 8 lea r6d, [r6*3] %ifdef PIC lea r11, [sixtap_filter_hb_m] @@ -333,13 +333,14 @@ cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2 REP_RET %endmacro -INIT_MMX -FILTER_SSSE3 4, 0, 0 -INIT_XMM -FILTER_SSSE3 8, 8, 7 +INIT_MMX ssse3 +FILTER_SSSE3 4 +INIT_XMM ssse3 +FILTER_SSSE3 8 ; 4x4 block, H-only 4-tap filter -cglobal put_vp8_epel4_h4_mmxext, 6, 6 +INIT_MMX mmx2 +cglobal put_vp8_epel4_h4, 6, 6 shl r5d, 4 %ifdef PIC lea r11, [fourtap_filter_hw_m] @@ -386,7 +387,8 @@ cglobal put_vp8_epel4_h4_mmxext, 6, 6 REP_RET ; 4x4 block, H-only 6-tap filter -cglobal put_vp8_epel4_h6_mmxext, 6, 6 +INIT_MMX mmx2 +cglobal put_vp8_epel4_h6, 6, 6 lea r5d, [r5*3] %ifdef PIC lea r11, [sixtap_filter_hw_m] @@ -442,8 +444,8 @@ cglobal put_vp8_epel4_h6_mmxext, 6, 6 jg .nextrow REP_RET -INIT_XMM -cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 +INIT_XMM sse2 +cglobal put_vp8_epel8_h4, 6, 6, 10 shl r5d, 5 %ifdef PIC lea r11, [fourtap_filter_v_m] @@ -490,7 +492,8 @@ cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 jg .nextrow REP_RET -cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 +INIT_XMM sse2 +cglobal put_vp8_epel8_h6, 6, 6, 14 lea r5d, [r5*3] shl r5d, 4 %ifdef PIC @@ -552,9 +555,9 @@ cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 jg .nextrow REP_RET -%macro FILTER_V 3 +%macro FILTER_V 1 ; 4x4 block, V-only 4-tap filter -cglobal put_vp8_epel%2_v4_%1, 7, 7, %3 +cglobal put_vp8_epel%1_v4, 7, 7, 8 shl r6d, 5 %ifdef PIC lea r11, [fourtap_filter_v_m] @@ -607,7 +610,7 @@ cglobal put_vp8_epel%2_v4_%1, 7, 7, %3 ; 4x4 block, V-only 6-tap filter -cglobal put_vp8_epel%2_v6_%1, 7, 7, %3 +cglobal put_vp8_epel%1_v6, 7, 7, 8 shl r6d, 4 lea r6, [r6*3] %ifdef PIC @@ -671,13 +674,13 @@ cglobal put_vp8_epel%2_v6_%1, 7, 7, %3 REP_RET %endmacro -INIT_MMX -FILTER_V mmxext, 4, 0 -INIT_XMM -FILTER_V sse2, 8, 8 +INIT_MMX mmx2 +FILTER_V 4 +INIT_XMM sse2 +FILTER_V 8 -%macro FILTER_BILINEAR 3 -cglobal put_vp8_bilinear%2_v_%1, 7,7,%3 +%macro FILTER_BILINEAR 1 +cglobal put_vp8_bilinear%1_v, 7, 7, 7 mov r5d, 8*16 shl r6d, 4 sub r5d, r6d @@ -705,7 +708,7 @@ cglobal put_vp8_bilinear%2_v_%1, 7,7,%3 psraw m2, 2 pavgw m0, m6 pavgw m2, m6 -%ifidn %1, mmxext +%if mmsize == 8 packuswb m0, m0 packuswb m2, m2 movh [r0+r1*0], m0 @@ -722,7 +725,7 @@ cglobal put_vp8_bilinear%2_v_%1, 7,7,%3 jg .nextrow REP_RET -cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 +cglobal put_vp8_bilinear%1_h, 7, 7, 7 mov r6d, 8*16 shl r5d, 4 sub r6d, r5d @@ -751,7 +754,7 @@ cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 psraw m2, 2 pavgw m0, m6 pavgw m2, m6 -%ifidn %1, mmxext +%if mmsize == 8 packuswb m0, m0 packuswb m2, m2 movh [r0+r1*0], m0 @@ -769,13 +772,13 @@ cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 REP_RET %endmacro -INIT_MMX -FILTER_BILINEAR mmxext, 4, 0 -INIT_XMM -FILTER_BILINEAR sse2, 8, 7 +INIT_MMX mmx2 +FILTER_BILINEAR 4 +INIT_XMM sse2 +FILTER_BILINEAR 8 %macro FILTER_BILINEAR_SSSE3 1 -cglobal put_vp8_bilinear%1_v_ssse3, 7,7 +cglobal put_vp8_bilinear%1_v, 7, 7, 5 shl r6d, 4 %ifdef PIC lea r11, [bilinear_filter_vb_m] @@ -811,7 +814,7 @@ cglobal put_vp8_bilinear%1_v_ssse3, 7,7 jg .nextrow REP_RET -cglobal put_vp8_bilinear%1_h_ssse3, 7,7 +cglobal put_vp8_bilinear%1_h, 7, 7, 5 shl r5d, 4 %ifdef PIC lea r11, [bilinear_filter_vb_m] @@ -848,12 +851,13 @@ cglobal put_vp8_bilinear%1_h_ssse3, 7,7 REP_RET %endmacro -INIT_MMX +INIT_MMX ssse3 FILTER_BILINEAR_SSSE3 4 -INIT_XMM +INIT_XMM ssse3 FILTER_BILINEAR_SSSE3 8 -cglobal put_vp8_pixels8_mmx, 5,5 +INIT_MMX mmx +cglobal put_vp8_pixels8, 5,5 .nextrow: movq mm0, [r2+r3*0] movq mm1, [r2+r3*1] @@ -866,7 +870,8 @@ cglobal put_vp8_pixels8_mmx, 5,5 REP_RET %if ARCH_X86_32 -cglobal put_vp8_pixels16_mmx, 5,5 +INIT_MMX mmx +cglobal put_vp8_pixels16, 5,5 .nextrow: movq mm0, [r2+r3*0+0] movq mm1, [r2+r3*0+8] @@ -883,7 +888,8 @@ cglobal put_vp8_pixels16_mmx, 5,5 REP_RET %endif -cglobal put_vp8_pixels16_sse, 5,5,2 +INIT_XMM sse +cglobal put_vp8_pixels16, 5,5,2 .nextrow: movups xmm0, [r2+r3*0] movups xmm1, [r2+r3*1] @@ -918,8 +924,8 @@ cglobal put_vp8_pixels16_sse, 5,5,2 %4 [r1+r2+%3], m5 %endmacro -INIT_MMX -cglobal vp8_idct_dc_add_mmx, 3, 3 +INIT_MMX mmx +cglobal vp8_idct_dc_add, 3, 3 ; load data movd m0, [r1] @@ -941,8 +947,8 @@ cglobal vp8_idct_dc_add_mmx, 3, 3 ADD_DC m0, m1, 0, movh RET -INIT_XMM -cglobal vp8_idct_dc_add_sse4, 3, 3, 6 +INIT_XMM sse4 +cglobal vp8_idct_dc_add, 3, 3, 6 ; load data movd m0, [r1] pxor m1, m1 @@ -976,8 +982,8 @@ cglobal vp8_idct_dc_add_sse4, 3, 3, 6 ;----------------------------------------------------------------------------- %if ARCH_X86_32 -INIT_MMX -cglobal vp8_idct_dc_add4y_mmx, 3, 3 +INIT_MMX mmx +cglobal vp8_idct_dc_add4y, 3, 3 ; load data movd m0, [r1+32*0] ; A movd m1, [r1+32*2] ; C @@ -1012,8 +1018,8 @@ cglobal vp8_idct_dc_add4y_mmx, 3, 3 RET %endif -INIT_XMM -cglobal vp8_idct_dc_add4y_sse2, 3, 3, 6 +INIT_XMM sse2 +cglobal vp8_idct_dc_add4y, 3, 3, 6 ; load data movd m0, [r1+32*0] ; A movd m1, [r1+32*2] ; C @@ -1046,8 +1052,8 @@ cglobal vp8_idct_dc_add4y_sse2, 3, 3, 6 ; void vp8_idct_dc_add4uv_(uint8_t *dst, DCTELEM block[4][16], int stride); ;----------------------------------------------------------------------------- -INIT_MMX -cglobal vp8_idct_dc_add4uv_mmx, 3, 3 +INIT_MMX mmx +cglobal vp8_idct_dc_add4uv, 3, 3 ; load data movd m0, [r1+32*0] ; A movd m1, [r1+32*2] ; C @@ -1118,9 +1124,8 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3 SWAP %4, %3 %endmacro -INIT_MMX -%macro VP8_IDCT_ADD 1 -cglobal vp8_idct_add_%1, 3, 3 +%macro VP8_IDCT_ADD 0 +cglobal vp8_idct_add, 3, 3 ; load block data movq m0, [r1+ 0] movq m1, [r1+ 8] @@ -1128,7 +1133,7 @@ cglobal vp8_idct_add_%1, 3, 3 movq m3, [r1+24] movq m6, [pw_20091] movq m7, [pw_17734] -%ifidn %1, sse +%if cpuflag(sse) xorps xmm0, xmm0 movaps [r1+ 0], xmm0 movaps [r1+16], xmm0 @@ -1157,9 +1162,11 @@ cglobal vp8_idct_add_%1, 3, 3 %endmacro %if ARCH_X86_32 -VP8_IDCT_ADD mmx +INIT_MMX mmx +VP8_IDCT_ADD %endif -VP8_IDCT_ADD sse +INIT_MMX sse +VP8_IDCT_ADD ;----------------------------------------------------------------------------- ; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16]) @@ -1192,13 +1199,13 @@ VP8_IDCT_ADD sse SWAP %1, %4, %3 %endmacro -%macro VP8_DC_WHT 1 -cglobal vp8_luma_dc_wht_%1, 2,3 +%macro VP8_DC_WHT 0 +cglobal vp8_luma_dc_wht, 2, 3 movq m0, [r1] movq m1, [r1+8] movq m2, [r1+16] movq m3, [r1+24] -%ifidn %1, sse +%if cpuflag(sse) xorps xmm0, xmm0 movaps [r1+ 0], xmm0 movaps [r1+16], xmm0 @@ -1222,11 +1229,12 @@ cglobal vp8_luma_dc_wht_%1, 2,3 RET %endmacro -INIT_MMX %if ARCH_X86_32 -VP8_DC_WHT mmx +INIT_MMX mmx +VP8_DC_WHT %endif -VP8_DC_WHT sse +INIT_MMX sse +VP8_DC_WHT ;----------------------------------------------------------------------------- ; void vp8_h/v_loop_filter_simple_(uint8_t *dst, int stride, int flim); -- cgit v1.2.3 From 28170f1a39236c5be91ab6df67e477a213c552b4 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 2 Mar 2012 20:38:02 -0800 Subject: vp8: convert loopfilter x86 assembly to use cpuflags(). --- libavcodec/x86/vp8dsp.asm | 359 ++++++++++++++++++++-------------------------- 1 file changed, 158 insertions(+), 201 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index f21045d405..4dba6db3b7 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -1422,7 +1422,17 @@ VP8_DC_WHT add %4, %5 %endmacro -%macro WRITE_8W_SSE2 5 +%macro WRITE_8W 5 +%if cpuflag(sse4) + pextrw [%3+%4*4], %1, 0 + pextrw [%2+%4*4], %1, 1 + pextrw [%3+%4*2], %1, 2 + pextrw [%3+%4 ], %1, 3 + pextrw [%3 ], %1, 4 + pextrw [%2 ], %1, 5 + pextrw [%2+%5 ], %1, 6 + pextrw [%2+%5*2], %1, 7 +%else movd %2d, %1 psrldq %1, 4 mov [%3+%4*4], %2w @@ -1448,67 +1458,51 @@ VP8_DC_WHT mov [%3+%5 ], %2w shr %2, 16 mov [%3+%5*2], %2w +%endif %endmacro -%macro WRITE_8W_SSE4 5 - pextrw [%3+%4*4], %1, 0 - pextrw [%2+%4*4], %1, 1 - pextrw [%3+%4*2], %1, 2 - pextrw [%3+%4 ], %1, 3 - pextrw [%3 ], %1, 4 - pextrw [%2 ], %1, 5 - pextrw [%2+%5 ], %1, 6 - pextrw [%2+%5*2], %1, 7 -%endmacro - -%macro SPLATB_REG_MMX 2-3 +%macro SPLATB_REG 2-3 +%if cpuflag(ssse3) + movd %1, %2d + pshufb %1, %3 +%elif cpuflag(sse2) movd %1, %2d punpcklbw %1, %1 - punpcklwd %1, %1 - punpckldq %1, %1 -%endmacro - -%macro SPLATB_REG_MMXEXT 2-3 + pshuflw %1, %1, 0x0 + punpcklqdq %1, %1 +%elif cpuflag(mmx2) movd %1, %2d punpcklbw %1, %1 pshufw %1, %1, 0x0 -%endmacro - -%macro SPLATB_REG_SSE2 2-3 +%else movd %1, %2d punpcklbw %1, %1 - pshuflw %1, %1, 0x0 - punpcklqdq %1, %1 -%endmacro - -%macro SPLATB_REG_SSSE3 3 - movd %1, %2d - pshufb %1, %3 + punpcklwd %1, %1 + punpckldq %1, %1 +%endif %endmacro -%macro SIMPLE_LOOPFILTER 4 -cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4 +%macro SIMPLE_LOOPFILTER 2 +cglobal vp8_%1_loop_filter_simple, 3, %2, 8 %if mmsize == 8 ; mmx/mmxext mov r3, 2 %endif -%ifnidn %1, sse2 -%if mmsize == 16 +%if cpuflag(ssse3) pxor m0, m0 -%endif %endif SPLATB_REG m7, r2, m0 ; splat "flim" into register ; set up indexes to address 4 rows mov r2, r1 neg r1 -%ifidn %2, h +%ifidn %1, h lea r0, [r0+4*r2-2] %endif %if mmsize == 8 ; mmx / mmxext .next8px %endif -%ifidn %2, v +%ifidn %1, v ; read 4 half/full rows of pixels mova m0, [r0+r1*2] ; p1 mova m1, [r0+r1] ; p0 @@ -1589,7 +1583,7 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4 psubusb m6, m3 ; p0+f2 ; store -%ifidn %2, v +%ifidn %1, v mova [r0], m4 mova [r0+r1], m6 %else ; h @@ -1597,12 +1591,12 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4 SBUTTERFLY bw, 6, 4, 0 %if mmsize == 16 ; sse2 -%ifidn %1, sse4 +%if cpuflag(sse4) inc r4 %endif WRITE_8W m6, r4, r0, r1, r2 lea r4, [r3+r1+1] -%ifidn %1, sse4 +%if cpuflag(sse4) inc r3 %endif WRITE_8W m4, r3, r4, r1, r2 @@ -1613,7 +1607,7 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4 %if mmsize == 8 ; mmx/mmxext ; next 8 pixels -%ifidn %2, v +%ifidn %1, v add r0, 8 ; advance 8 cols = pixels %else ; h lea r0, [r0+r2*8-1] ; advance 8 rows = lines @@ -1627,41 +1621,38 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4 %endmacro %if ARCH_X86_32 -INIT_MMX -%define SPLATB_REG SPLATB_REG_MMX -SIMPLE_LOOPFILTER mmx, v, 4, 0 -SIMPLE_LOOPFILTER mmx, h, 5, 0 -%define SPLATB_REG SPLATB_REG_MMXEXT -SIMPLE_LOOPFILTER mmxext, v, 4, 0 -SIMPLE_LOOPFILTER mmxext, h, 5, 0 -%endif - -INIT_XMM -%define SPLATB_REG SPLATB_REG_SSE2 -%define WRITE_8W WRITE_8W_SSE2 -SIMPLE_LOOPFILTER sse2, v, 3, 8 -SIMPLE_LOOPFILTER sse2, h, 5, 8 -%define SPLATB_REG SPLATB_REG_SSSE3 -SIMPLE_LOOPFILTER ssse3, v, 3, 8 -SIMPLE_LOOPFILTER ssse3, h, 5, 8 -%define WRITE_8W WRITE_8W_SSE4 -SIMPLE_LOOPFILTER sse4, h, 5, 8 +INIT_MMX mmx +SIMPLE_LOOPFILTER v, 4 +SIMPLE_LOOPFILTER h, 5 +INIT_MMX mmx2 +SIMPLE_LOOPFILTER v, 4 +SIMPLE_LOOPFILTER h, 5 +%endif + +INIT_XMM sse2 +SIMPLE_LOOPFILTER v, 3 +SIMPLE_LOOPFILTER h, 5 +INIT_XMM ssse3 +SIMPLE_LOOPFILTER v, 3 +SIMPLE_LOOPFILTER h, 5 +INIT_XMM sse4 +SIMPLE_LOOPFILTER h, 5 ;----------------------------------------------------------------------------- ; void vp8_h/v_loop_filter_inner_(uint8_t *dst, [uint8_t *v,] int stride, ; int flimE, int flimI, int hev_thr); ;----------------------------------------------------------------------------- -%macro INNER_LOOPFILTER 5 -%if %4 == 8 ; chroma -cglobal vp8_%2_loop_filter8uv_inner_%1, 6, %3, %5 +%macro INNER_LOOPFILTER 3 +%if %3 == 8 ; chroma +cglobal vp8_%1_loop_filter8uv_inner, 6, %2, 13 %define dst8_reg r1 %define mstride_reg r2 %define E_reg r3 %define I_reg r4 %define hev_thr_reg r5 %else ; luma -cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 +cglobal vp8_%1_loop_filter16y_inner, 5, %2, 13 %define mstride_reg r1 %define E_reg r2 %define I_reg r3 @@ -1681,11 +1672,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %define stack_reg hev_thr_reg %endif -%ifnidn %1, sse2 -%if mmsize == 16 +%if cpuflag(ssse3) pxor m7, m7 %endif -%endif %ifndef m8 ; mmx/mmxext or sse2 on x86-32 ; splat function arguments @@ -1696,7 +1685,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 ; align stack mov stack_reg, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack -%ifidn %2, v +%ifidn %1, v sub rsp, mmsize * 4 ; stack layout: [0]=E, [1]=I, [2]=hev_thr ; [3]=hev() result %else ; h @@ -1729,14 +1718,14 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh %endif -%if mmsize == 8 && %4 == 16 ; mmx/mmxext +%if mmsize == 8 && %3 == 16 ; mmx/mmxext mov cnt_reg, 2 %endif mov stride_reg, mstride_reg neg mstride_reg -%ifidn %2, h +%ifidn %1, h lea dst_reg, [dst_reg + stride_reg*4-4] -%if %4 == 8 +%if %3 == 8 lea dst8_reg, [dst8_reg+ stride_reg*4-4] %endif %endif @@ -1746,8 +1735,8 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %endif ; read lea dst2_reg, [dst_reg + stride_reg] -%ifidn %2, v -%if %4 == 8 && mmsize == 16 +%ifidn %1, v +%if %3 == 8 && mmsize == 16 %define movrow movh %else %define movrow mova @@ -1758,7 +1747,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 movrow m5, [dst2_reg] ; q1 movrow m6, [dst2_reg+ stride_reg] ; q2 movrow m7, [dst2_reg+ stride_reg*2] ; q3 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 movhps m0, [dst8_reg+mstride_reg*4] movhps m2, [dst8_reg+mstride_reg*2] add dst8_reg, stride_reg @@ -1795,7 +1784,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 SWAP 6, 3 SWAP 5, 3 %else ; sse2 (h) -%if %4 == 16 +%if %3 == 16 lea dst8_reg, [dst_reg + stride_reg*8] %endif @@ -1882,7 +1871,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 psubusb m6, m5 ; q2-q1 por m6, m4 ; abs(q2-q1) -%ifidn %1, mmx +%if notcpuflag(mmx2) mova m4, flim_I pxor m3, m3 psubusb m0, m4 @@ -1904,9 +1893,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 ; normal_limit and high_edge_variance for p1-p0, q1-q0 SWAP 7, 3 ; now m7 is zero -%ifidn %2, v +%ifidn %1, v movrow m3, [dst_reg +mstride_reg] ; p0 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 movhps m3, [dst8_reg+mstride_reg] %endif %elifdef m12 @@ -1922,7 +1911,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 psubusb m1, m3 ; p1-p0 psubusb m6, m2 ; p0-p1 por m1, m6 ; abs(p1-p0) -%ifidn %1, mmx +%if notcpuflag(mmx2) mova m6, m1 psubusb m1, m4 psubusb m6, hev_thr @@ -1936,9 +1925,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %endif SWAP 6, 4 ; now m6 is I -%ifidn %2, v +%ifidn %1, v movrow m4, [dst_reg] ; q0 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 movhps m4, [dst8_reg] %endif %elifdef m8 @@ -1953,7 +1942,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 psubusb m1, m5 ; q0-q1 psubusb m7, m4 ; q1-q0 por m1, m7 ; abs(q1-q0) -%ifidn %1, mmx +%if notcpuflag(mmx2) mova m7, m1 psubusb m1, m6 psubusb m7, hev_thr @@ -2061,14 +2050,14 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %else mova m6, mask_res %endif -%ifidn %1, mmx +%if notcpuflag(mmx2) mova m7, [pb_1] %else ; mmxext/sse2 pxor m7, m7 %endif pand m0, m6 pand m1, m6 -%ifidn %1, mmx +%if notcpuflag(mmx2) paddusb m0, m7 pand m1, [pb_FE] pandn m7, m0 @@ -2086,12 +2075,12 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 paddusb m2, m0 ; p1+a ; store -%ifidn %2, v +%ifidn %1, v movrow [dst_reg +mstride_reg*2], m2 movrow [dst_reg +mstride_reg ], m3 movrow [dst_reg], m4 movrow [dst_reg + stride_reg ], m5 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 movhps [dst8_reg+mstride_reg*2], m2 movhps [dst8_reg+mstride_reg ], m3 movhps [dst8_reg], m4 @@ -2108,20 +2097,20 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 WRITE_4x2D 2, 3, 4, 5, dst_reg, dst2_reg, mstride_reg, stride_reg %else ; sse2 (h) lea dst8_reg, [dst8_reg+mstride_reg+2] - WRITE_4x4D 2, 3, 4, 5, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %4 + WRITE_4x4D 2, 3, 4, 5, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %3 %endif %endif %if mmsize == 8 -%if %4 == 8 ; chroma -%ifidn %2, h +%if %3 == 8 ; chroma +%ifidn %1, h sub dst_reg, 2 %endif cmp dst_reg, dst8_reg mov dst_reg, dst8_reg jnz .next8px %else -%ifidn %2, h +%ifidn %1, h lea dst_reg, [dst_reg + stride_reg*8-2] %else ; v add dst_reg, 8 @@ -2138,56 +2127,46 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %endmacro %if ARCH_X86_32 -INIT_MMX -%define SPLATB_REG SPLATB_REG_MMX -INNER_LOOPFILTER mmx, v, 6, 16, 0 -INNER_LOOPFILTER mmx, h, 6, 16, 0 -INNER_LOOPFILTER mmx, v, 6, 8, 0 -INNER_LOOPFILTER mmx, h, 6, 8, 0 - -%define SPLATB_REG SPLATB_REG_MMXEXT -INNER_LOOPFILTER mmxext, v, 6, 16, 0 -INNER_LOOPFILTER mmxext, h, 6, 16, 0 -INNER_LOOPFILTER mmxext, v, 6, 8, 0 -INNER_LOOPFILTER mmxext, h, 6, 8, 0 -%endif - -INIT_XMM -%define SPLATB_REG SPLATB_REG_SSE2 -INNER_LOOPFILTER sse2, v, 5, 16, 13 -%ifdef m8 -INNER_LOOPFILTER sse2, h, 5, 16, 13 -%else -INNER_LOOPFILTER sse2, h, 6, 16, 13 -%endif -INNER_LOOPFILTER sse2, v, 6, 8, 13 -INNER_LOOPFILTER sse2, h, 6, 8, 13 +INIT_MMX mmx +INNER_LOOPFILTER v, 6, 16 +INNER_LOOPFILTER h, 6, 16 +INNER_LOOPFILTER v, 6, 8 +INNER_LOOPFILTER h, 6, 8 -%define SPLATB_REG SPLATB_REG_SSSE3 -INNER_LOOPFILTER ssse3, v, 5, 16, 13 -%ifdef m8 -INNER_LOOPFILTER ssse3, h, 5, 16, 13 -%else -INNER_LOOPFILTER ssse3, h, 6, 16, 13 +INIT_MMX mmx2 +INNER_LOOPFILTER v, 6, 16 +INNER_LOOPFILTER h, 6, 16 +INNER_LOOPFILTER v, 6, 8 +INNER_LOOPFILTER h, 6, 8 %endif -INNER_LOOPFILTER ssse3, v, 6, 8, 13 -INNER_LOOPFILTER ssse3, h, 6, 8, 13 + +INIT_XMM sse2 +INNER_LOOPFILTER v, 5, 16 +INNER_LOOPFILTER h, 5 + ARCH_X86_32, 16 +INNER_LOOPFILTER v, 6, 8 +INNER_LOOPFILTER h, 6, 8 + +INIT_XMM ssse3 +INNER_LOOPFILTER v, 5, 16 +INNER_LOOPFILTER h, 5 + ARCH_X86_32, 16 +INNER_LOOPFILTER v, 6, 8 +INNER_LOOPFILTER h, 6, 8 ;----------------------------------------------------------------------------- ; void vp8_h/v_loop_filter_mbedge_(uint8_t *dst, [uint8_t *v,] int stride, ; int flimE, int flimI, int hev_thr); ;----------------------------------------------------------------------------- -%macro MBEDGE_LOOPFILTER 5 -%if %4 == 8 ; chroma -cglobal vp8_%2_loop_filter8uv_mbedge_%1, 6, %3, %5 +%macro MBEDGE_LOOPFILTER 3 +%if %3 == 8 ; chroma +cglobal vp8_%1_loop_filter8uv_mbedge, 6, %2, 15 %define dst8_reg r1 %define mstride_reg r2 %define E_reg r3 %define I_reg r4 %define hev_thr_reg r5 %else ; luma -cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 +cglobal vp8_%1_loop_filter16y_mbedge, 5, %2, 15 %define mstride_reg r1 %define E_reg r2 %define I_reg r3 @@ -2207,14 +2186,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define stack_reg hev_thr_reg %endif -%define ssse3_or_higher 0 -%ifnidn %1, sse2 -%if mmsize == 16 -%define ssse3_or_higher 1 -%endif -%endif - -%if ssse3_or_higher +%if cpuflag(ssse3) pxor m7, m7 %endif @@ -2275,14 +2247,14 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh %endif -%if mmsize == 8 && %4 == 16 ; mmx/mmxext +%if mmsize == 8 && %3 == 16 ; mmx/mmxext mov cnt_reg, 2 %endif mov stride_reg, mstride_reg neg mstride_reg -%ifidn %2, h +%ifidn %1, h lea dst_reg, [dst_reg + stride_reg*4-4] -%if %4 == 8 +%if %3 == 8 lea dst8_reg, [dst8_reg+ stride_reg*4-4] %endif %endif @@ -2292,8 +2264,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %endif ; read lea dst2_reg, [dst_reg + stride_reg] -%ifidn %2, v -%if %4 == 8 && mmsize == 16 +%ifidn %1, v +%if %3 == 8 && mmsize == 16 %define movrow movh %else %define movrow mova @@ -2304,7 +2276,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 movrow m5, [dst2_reg] ; q1 movrow m6, [dst2_reg+ stride_reg] ; q2 movrow m7, [dst2_reg+ stride_reg*2] ; q3 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 movhps m0, [dst8_reg+mstride_reg*4] movhps m2, [dst8_reg+mstride_reg*2] add dst8_reg, stride_reg @@ -2341,7 +2313,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 SWAP 6, 3 SWAP 5, 3 %else ; sse2 (h) -%if %4 == 16 +%if %3 == 16 lea dst8_reg, [dst_reg + stride_reg*8] %endif @@ -2430,7 +2402,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubusb m6, m5 ; q2-q1 por m6, m4 ; abs(q2-q1) -%ifidn %1, mmx +%if notcpuflag(mmx2) mova m4, flim_I pxor m3, m3 psubusb m0, m4 @@ -2452,9 +2424,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 ; normal_limit and high_edge_variance for p1-p0, q1-q0 SWAP 7, 3 ; now m7 is zero -%ifidn %2, v +%ifidn %1, v movrow m3, [dst_reg +mstride_reg] ; p0 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 movhps m3, [dst8_reg+mstride_reg] %endif %elifdef m12 @@ -2470,7 +2442,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubusb m1, m3 ; p1-p0 psubusb m6, m2 ; p0-p1 por m1, m6 ; abs(p1-p0) -%ifidn %1, mmx +%if notcpuflag(mmx2) mova m6, m1 psubusb m1, m4 psubusb m6, hev_thr @@ -2484,9 +2456,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %endif SWAP 6, 4 ; now m6 is I -%ifidn %2, v +%ifidn %1, v movrow m4, [dst_reg] ; q0 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 movhps m4, [dst8_reg] %endif %elifdef m8 @@ -2501,7 +2473,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubusb m1, m5 ; q0-q1 psubusb m7, m4 ; q1-q0 por m1, m7 ; abs(q1-q0) -%ifidn %1, mmx +%if notcpuflag(mmx2) mova m7, m1 psubusb m1, m6 psubusb m7, hev_thr @@ -2613,7 +2585,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 paddusb m4, m1 ; q0-f1 ; filter_mbedge (m2-m5 = p1-q1; lim_res carries w) -%if ssse3_or_higher +%if cpuflag(ssse3) mova m7, [pb_1] %else mova m7, [pw_63] @@ -2626,7 +2598,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 pxor m0, m0 mova m6, m1 pcmpgtb m0, m1 ; which are negative -%if ssse3_or_higher +%if cpuflag(ssse3) punpcklbw m6, m7 ; interleave with "1" for rounding punpckhbw m1, m7 %else @@ -2634,7 +2606,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 punpckhbw m1, m0 %endif mova lim_sign, m0 -%if ssse3_or_higher +%if cpuflag(ssse3) mova m7, [pb_27_63] %ifndef m8 mova lim_res, m1 @@ -2667,7 +2639,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubb m1, m6 pand m1, m0 ; -a0 pandn m0, m6 ; +a0 -%if ssse3_or_higher +%if cpuflag(ssse3) mova m6, [pb_18_63] ; pipelining %endif psubusb m3, m1 @@ -2675,7 +2647,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 paddusb m3, m0 ; p0+a0 psubusb m4, m0 ; q0-a0 -%if ssse3_or_higher +%if cpuflag(ssse3) SWAP 6, 7 %ifdef m10 SWAP 1, 10 @@ -2707,7 +2679,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubb m1, m6 pand m1, m0 ; -a1 pandn m0, m6 ; +a1 -%if ssse3_or_higher +%if cpuflag(ssse3) mova m6, [pb_9_63] %endif psubusb m2, m1 @@ -2715,7 +2687,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 paddusb m2, m0 ; p1+a1 psubusb m5, m0 ; q1-a1 -%if ssse3_or_higher +%if cpuflag(ssse3) SWAP 6, 7 %ifdef m10 SWAP 1, 10 @@ -2765,14 +2737,14 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubusb m6, m7 ; q1-a1 ; store -%ifidn %2, v +%ifidn %1, v movrow [dst2_reg+mstride_reg*4], m1 movrow [dst_reg +mstride_reg*2], m2 movrow [dst_reg +mstride_reg ], m3 movrow [dst_reg], m4 movrow [dst2_reg], m5 movrow [dst2_reg+ stride_reg ], m6 -%if mmsize == 16 && %4 == 8 +%if mmsize == 16 && %3 == 8 add dst8_reg, mstride_reg movhps [dst8_reg+mstride_reg*2], m1 movhps [dst8_reg+mstride_reg ], m2 @@ -2796,14 +2768,14 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 WRITE_2x4W m5, m6, dst2_reg, dst_reg, mstride_reg, stride_reg %else ; sse2 (h) lea dst8_reg, [dst8_reg+mstride_reg+1] - WRITE_4x4D 1, 2, 3, 4, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %4 + WRITE_4x4D 1, 2, 3, 4, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %3 lea dst_reg, [dst2_reg+mstride_reg+4] lea dst8_reg, [dst8_reg+mstride_reg+4] -%ifidn %1, sse4 +%if cpuflag(sse4) add dst2_reg, 4 %endif WRITE_8W m5, dst2_reg, dst_reg, mstride_reg, stride_reg -%ifidn %1, sse4 +%if cpuflag(sse4) lea dst2_reg, [dst8_reg+ stride_reg] %endif WRITE_8W m6, dst2_reg, dst8_reg, mstride_reg, stride_reg @@ -2811,15 +2783,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %endif %if mmsize == 8 -%if %4 == 8 ; chroma -%ifidn %2, h +%if %3 == 8 ; chroma +%ifidn %1, h sub dst_reg, 5 %endif cmp dst_reg, dst8_reg mov dst_reg, dst8_reg jnz .next8px %else -%ifidn %2, h +%ifidn %1, h lea dst_reg, [dst_reg + stride_reg*8-5] %else ; v add dst_reg, 8 @@ -2836,46 +2808,31 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %endmacro %if ARCH_X86_32 -INIT_MMX -%define SPLATB_REG SPLATB_REG_MMX -MBEDGE_LOOPFILTER mmx, v, 6, 16, 0 -MBEDGE_LOOPFILTER mmx, h, 6, 16, 0 -MBEDGE_LOOPFILTER mmx, v, 6, 8, 0 -MBEDGE_LOOPFILTER mmx, h, 6, 8, 0 - -%define SPLATB_REG SPLATB_REG_MMXEXT -MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0 -MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0 -MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0 -MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 -%endif - -INIT_XMM -%define SPLATB_REG SPLATB_REG_SSE2 -%define WRITE_8W WRITE_8W_SSE2 -MBEDGE_LOOPFILTER sse2, v, 5, 16, 15 -%ifdef m8 -MBEDGE_LOOPFILTER sse2, h, 5, 16, 15 -%else -MBEDGE_LOOPFILTER sse2, h, 6, 16, 15 -%endif -MBEDGE_LOOPFILTER sse2, v, 6, 8, 15 -MBEDGE_LOOPFILTER sse2, h, 6, 8, 15 +INIT_MMX mmx +MBEDGE_LOOPFILTER v, 6, 16 +MBEDGE_LOOPFILTER h, 6, 16 +MBEDGE_LOOPFILTER v, 6, 8 +MBEDGE_LOOPFILTER h, 6, 8 -%define SPLATB_REG SPLATB_REG_SSSE3 -MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15 -%ifdef m8 -MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15 -%else -MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15 +INIT_MMX mmx2 +MBEDGE_LOOPFILTER v, 6, 16 +MBEDGE_LOOPFILTER h, 6, 16 +MBEDGE_LOOPFILTER v, 6, 8 +MBEDGE_LOOPFILTER h, 6, 8 %endif -MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 -MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 -%define WRITE_8W WRITE_8W_SSE4 -%ifdef m8 -MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 -%else -MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 -%endif -MBEDGE_LOOPFILTER sse4, h, 6, 8, 15 +INIT_XMM sse2 +MBEDGE_LOOPFILTER v, 5, 16 +MBEDGE_LOOPFILTER h, 5 + ARCH_X86_32, 16 +MBEDGE_LOOPFILTER v, 6, 8 +MBEDGE_LOOPFILTER h, 6, 8 + +INIT_XMM ssse3 +MBEDGE_LOOPFILTER v, 5, 16 +MBEDGE_LOOPFILTER h, 5 + ARCH_X86_32, 16 +MBEDGE_LOOPFILTER v, 6, 8 +MBEDGE_LOOPFILTER h, 6, 8 + +INIT_XMM sse4 +MBEDGE_LOOPFILTER h, 5 + ARCH_X86_32, 16 +MBEDGE_LOOPFILTER h, 6, 8 -- cgit v1.2.3 From 21ffc78fd75624626be297cee515b1f11a755ccf Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 3 Mar 2012 06:46:29 -0800 Subject: vp8: convert mc x86 assembly to use named arguments. --- libavcodec/x86/vp8dsp.asm | 544 +++++++++++++++++++++++----------------------- 1 file changed, 272 insertions(+), 272 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 4dba6db3b7..2f45f5a478 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -116,23 +116,25 @@ bilinear_filter_vb_m: times 8 db 7, 1 times 8 db 1, 7 %ifdef PIC -%define fourtap_filter_hw r11 -%define sixtap_filter_hw r11 -%define fourtap_filter_hb r11 -%define sixtap_filter_hb r11 -%define fourtap_filter_v r11 -%define sixtap_filter_v r11 -%define bilinear_filter_vw r11 -%define bilinear_filter_vb r11 +%define fourtap_filter_hw picregq +%define sixtap_filter_hw picregq +%define fourtap_filter_hb picregq +%define sixtap_filter_hb picregq +%define fourtap_filter_v picregq +%define sixtap_filter_v picregq +%define bilinear_filter_vw picregq +%define bilinear_filter_vb picregq +%define npicregs 1 %else -%define fourtap_filter_hw fourtap_filter_hw_m -%define sixtap_filter_hw sixtap_filter_hw_m -%define fourtap_filter_hb fourtap_filter_hb_m -%define sixtap_filter_hb sixtap_filter_hb_m -%define fourtap_filter_v fourtap_filter_v_m -%define sixtap_filter_v sixtap_filter_v_m +%define fourtap_filter_hw fourtap_filter_hw_m +%define sixtap_filter_hw sixtap_filter_hw_m +%define fourtap_filter_hb fourtap_filter_hb_m +%define sixtap_filter_hb sixtap_filter_hb_m +%define fourtap_filter_v fourtap_filter_v_m +%define sixtap_filter_v sixtap_filter_v_m %define bilinear_filter_vw bilinear_filter_vw_m %define bilinear_filter_vb bilinear_filter_vb_m +%define npicregs 0 %endif filter_h2_shuf: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 @@ -174,25 +176,25 @@ SECTION .text ;----------------------------------------------------------------------------- %macro FILTER_SSSE3 1 -cglobal put_vp8_epel%1_h6, 6, 6, 8 - lea r5d, [r5*3] +cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, height, mx, picreg + lea mxd, [mxq*3] mova m3, [filter_h6_shuf2] mova m4, [filter_h6_shuf3] %ifdef PIC - lea r11, [sixtap_filter_hb_m] + lea picregq, [sixtap_filter_hb_m] %endif - mova m5, [sixtap_filter_hb+r5*8-48] ; set up 6tap filter in bytes - mova m6, [sixtap_filter_hb+r5*8-32] - mova m7, [sixtap_filter_hb+r5*8-16] + mova m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes + mova m6, [sixtap_filter_hb+mxq*8-32] + mova m7, [sixtap_filter_hb+mxq*8-16] .nextrow - movu m0, [r2-2] + movu m0, [srcq-2] mova m1, m0 mova m2, m0 %if mmsize == 8 ; For epel4, we need 9 bytes, but only 8 get loaded; to compensate, do the ; shuffle with a memory operand - punpcklbw m0, [r2+3] + punpcklbw m0, [srcq+3] %else pshufb m0, [filter_h6_shuf1] %endif @@ -206,28 +208,28 @@ cglobal put_vp8_epel%1_h6, 6, 6, 8 paddsw m0, [pw_64] psraw m0, 7 packuswb m0, m0 - movh [r0], m0 ; store + movh [dstq], m0 ; store ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET -cglobal put_vp8_epel%1_h4, 6, 6, 7 - shl r5d, 4 +cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 mova m2, [pw_64] mova m3, [filter_h2_shuf] mova m4, [filter_h4_shuf] %ifdef PIC - lea r11, [fourtap_filter_hb_m] + lea picregq, [fourtap_filter_hb_m] %endif - mova m5, [fourtap_filter_hb+r5-16] ; set up 4tap filter in bytes - mova m6, [fourtap_filter_hb+r5] + mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes + mova m6, [fourtap_filter_hb+mxq] .nextrow - movu m0, [r2-1] + movu m0, [srcq-1] mova m1, m0 pshufb m0, m3 pshufb m1, m4 @@ -237,33 +239,33 @@ cglobal put_vp8_epel%1_h4, 6, 6, 7 paddsw m0, m1 psraw m0, 7 packuswb m0, m0 - movh [r0], m0 ; store + movh [dstq], m0 ; store ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET -cglobal put_vp8_epel%1_v4, 7, 7, 8 - shl r6d, 4 +cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 %ifdef PIC - lea r11, [fourtap_filter_hb_m] + lea picregq, [fourtap_filter_hb_m] %endif - mova m5, [fourtap_filter_hb+r6-16] - mova m6, [fourtap_filter_hb+r6] + mova m5, [fourtap_filter_hb+myq-16] + mova m6, [fourtap_filter_hb+myq] mova m7, [pw_64] ; read 3 lines - sub r2, r3 - movh m0, [r2] - movh m1, [r2+ r3] - movh m2, [r2+2*r3] - add r2, r3 + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+ srcstrideq] + movh m2, [srcq+2*srcstrideq] + add srcq, srcstrideq .nextrow - movh m3, [r2+2*r3] ; read new row + movh m3, [srcq+2*srcstrideq] ; read new row mova m4, m0 mova m0, m1 punpcklbw m4, m1 @@ -276,44 +278,44 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8 paddsw m4, m7 psraw m4, 7 packuswb m4, m4 - movh [r0], m4 + movh [dstq], m4 ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET -cglobal put_vp8_epel%1_v6, 7, 7, 8 - lea r6d, [r6*3] +cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + lea myd, [myq*3] %ifdef PIC - lea r11, [sixtap_filter_hb_m] + lea picregq, [sixtap_filter_hb_m] %endif - lea r6, [sixtap_filter_hb+r6*8] + lea myq, [sixtap_filter_hb+myq*8] ; read 5 lines - sub r2, r3 - sub r2, r3 - movh m0, [r2] - movh m1, [r2+r3] - movh m2, [r2+r3*2] - lea r2, [r2+r3*2] - add r2, r3 - movh m3, [r2] - movh m4, [r2+r3] + sub srcq, srcstrideq + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + movh m3, [srcq] + movh m4, [srcq+srcstrideq] .nextrow - movh m5, [r2+2*r3] ; read new row + movh m5, [srcq+2*srcstrideq] ; read new row mova m6, m0 punpcklbw m6, m5 mova m0, m1 punpcklbw m1, m2 mova m7, m3 punpcklbw m7, m4 - pmaddubsw m6, [r6-48] - pmaddubsw m1, [r6-32] - pmaddubsw m7, [r6-16] + pmaddubsw m6, [myq-48] + pmaddubsw m1, [myq-32] + pmaddubsw m7, [myq-16] paddsw m6, m1 paddsw m6, m7 mova m1, m2 @@ -323,12 +325,12 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8 mova m3, m4 packuswb m6, m6 mova m4, m5 - movh [r0], m6 + movh [dstq], m6 ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET %endmacro @@ -340,18 +342,18 @@ FILTER_SSSE3 8 ; 4x4 block, H-only 4-tap filter INIT_MMX mmx2 -cglobal put_vp8_epel4_h4, 6, 6 - shl r5d, 4 +cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 %ifdef PIC - lea r11, [fourtap_filter_hw_m] + lea picregq, [fourtap_filter_hw_m] %endif - movq mm4, [fourtap_filter_hw+r5-16] ; set up 4tap filter in words - movq mm5, [fourtap_filter_hw+r5] + movq mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words + movq mm5, [fourtap_filter_hw+mxq] movq mm7, [pw_64] pxor mm6, mm6 .nextrow - movq mm1, [r2-1] ; (ABCDEFGH) load 8 horizontal pixels + movq mm1, [srcq-1] ; (ABCDEFGH) load 8 horizontal pixels ; first set of 2 pixels movq mm2, mm1 ; byte ABCD.. @@ -377,30 +379,30 @@ cglobal put_vp8_epel4_h4, 6, 6 paddsw mm3, mm7 ; rounding psraw mm3, 7 packuswb mm3, mm6 ; clip and word->bytes - movd [r0], mm3 ; store + movd [dstq], mm3 ; store ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET ; 4x4 block, H-only 6-tap filter INIT_MMX mmx2 -cglobal put_vp8_epel4_h6, 6, 6 - lea r5d, [r5*3] +cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg + lea mxd, [mxq*3] %ifdef PIC - lea r11, [sixtap_filter_hw_m] + lea picregq, [sixtap_filter_hw_m] %endif - movq mm4, [sixtap_filter_hw+r5*8-48] ; set up 4tap filter in words - movq mm5, [sixtap_filter_hw+r5*8-32] - movq mm6, [sixtap_filter_hw+r5*8-16] + movq mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words + movq mm5, [sixtap_filter_hw+mxq*8-32] + movq mm6, [sixtap_filter_hw+mxq*8-16] movq mm7, [pw_64] pxor mm3, mm3 .nextrow - movq mm1, [r2-2] ; (ABCDEFGH) load 8 horizontal pixels + movq mm1, [srcq-2] ; (ABCDEFGH) load 8 horizontal pixels ; first set of 2 pixels movq mm2, mm1 ; byte ABCD.. @@ -420,7 +422,7 @@ cglobal put_vp8_epel4_h6, 6, 6 paddd mm1, mm2 ; finish 1st 2px ; second set of 2 pixels, use backup of above - movd mm2, [r2+3] ; byte FGHI (prevent overreads) + movd mm2, [srcq+3] ; byte FGHI (prevent overreads) pmaddwd mm0, mm4 ; multiply 1st backed up 2px with F0/F1 pmaddwd mm3, mm5 ; multiply 2nd backed up 2px with F2/F3 paddd mm0, mm3 ; add to 2nd 2px cache @@ -435,35 +437,35 @@ cglobal put_vp8_epel4_h6, 6, 6 paddsw mm1, mm7 ; rounding psraw mm1, 7 packuswb mm1, mm3 ; clip and word->bytes - movd [r0], mm1 ; store + movd [dstq], mm1 ; store ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET INIT_XMM sse2 -cglobal put_vp8_epel8_h4, 6, 6, 10 - shl r5d, 5 +cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 5 %ifdef PIC - lea r11, [fourtap_filter_v_m] + lea picregq, [fourtap_filter_v_m] %endif - lea r5, [fourtap_filter_v+r5-32] + lea mxq, [fourtap_filter_v+mxq-32] pxor m7, m7 mova m4, [pw_64] - mova m5, [r5+ 0] - mova m6, [r5+16] + mova m5, [mxq+ 0] + mova m6, [mxq+16] %ifdef m8 - mova m8, [r5+32] - mova m9, [r5+48] + mova m8, [mxq+32] + mova m9, [mxq+48] %endif .nextrow - movq m0, [r2-1] - movq m1, [r2-0] - movq m2, [r2+1] - movq m3, [r2+2] + movq m0, [srcq-1] + movq m1, [srcq-0] + movq m2, [srcq+1] + movq m3, [srcq+2] punpcklbw m0, m7 punpcklbw m1, m7 punpcklbw m2, m7 @@ -474,8 +476,8 @@ cglobal put_vp8_epel8_h4, 6, 6, 10 pmullw m2, m8 pmullw m3, m9 %else - pmullw m2, [r5+32] - pmullw m3, [r5+48] + pmullw m2, [mxq+32] + pmullw m3, [mxq+48] %endif paddsw m0, m1 paddsw m2, m3 @@ -483,40 +485,40 @@ cglobal put_vp8_epel8_h4, 6, 6, 10 paddsw m0, m4 psraw m0, 7 packuswb m0, m7 - movh [r0], m0 ; store + movh [dstq], m0 ; store ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET INIT_XMM sse2 -cglobal put_vp8_epel8_h6, 6, 6, 14 - lea r5d, [r5*3] - shl r5d, 4 +cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg + lea mxd, [mxq*3] + shl mxd, 4 %ifdef PIC - lea r11, [sixtap_filter_v_m] + lea picregq, [sixtap_filter_v_m] %endif - lea r5, [sixtap_filter_v+r5-96] + lea mxq, [sixtap_filter_v+mxq-96] pxor m7, m7 mova m6, [pw_64] %ifdef m8 - mova m8, [r5+ 0] - mova m9, [r5+16] - mova m10, [r5+32] - mova m11, [r5+48] - mova m12, [r5+64] - mova m13, [r5+80] + mova m8, [mxq+ 0] + mova m9, [mxq+16] + mova m10, [mxq+32] + mova m11, [mxq+48] + mova m12, [mxq+64] + mova m13, [mxq+80] %endif .nextrow - movq m0, [r2-2] - movq m1, [r2-1] - movq m2, [r2-0] - movq m3, [r2+1] - movq m4, [r2+2] - movq m5, [r2+3] + movq m0, [srcq-2] + movq m1, [srcq-1] + movq m2, [srcq-0] + movq m3, [srcq+1] + movq m4, [srcq+2] + movq m5, [srcq+3] punpcklbw m0, m7 punpcklbw m1, m7 punpcklbw m2, m7 @@ -531,12 +533,12 @@ cglobal put_vp8_epel8_h6, 6, 6, 14 pmullw m4, m12 pmullw m5, m13 %else - pmullw m0, [r5+ 0] - pmullw m1, [r5+16] - pmullw m2, [r5+32] - pmullw m3, [r5+48] - pmullw m4, [r5+64] - pmullw m5, [r5+80] + pmullw m0, [mxq+ 0] + pmullw m1, [mxq+16] + pmullw m2, [mxq+32] + pmullw m3, [mxq+48] + pmullw m4, [mxq+64] + pmullw m5, [mxq+80] %endif paddsw m1, m4 paddsw m0, m5 @@ -546,52 +548,52 @@ cglobal put_vp8_epel8_h6, 6, 6, 14 paddsw m0, m6 psraw m0, 7 packuswb m0, m7 - movh [r0], m0 ; store + movh [dstq], m0 ; store ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET %macro FILTER_V 1 ; 4x4 block, V-only 4-tap filter -cglobal put_vp8_epel%1_v4, 7, 7, 8 - shl r6d, 5 +cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + shl myd, 5 %ifdef PIC - lea r11, [fourtap_filter_v_m] + lea picregq, [fourtap_filter_v_m] %endif - lea r6, [fourtap_filter_v+r6-32] + lea myq, [fourtap_filter_v+myq-32] mova m6, [pw_64] pxor m7, m7 - mova m5, [r6+48] + mova m5, [myq+48] ; read 3 lines - sub r2, r3 - movh m0, [r2] - movh m1, [r2+ r3] - movh m2, [r2+2*r3] - add r2, r3 + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+ srcstrideq] + movh m2, [srcq+2*srcstrideq] + add srcq, srcstrideq punpcklbw m0, m7 punpcklbw m1, m7 punpcklbw m2, m7 .nextrow ; first calculate negative taps (to prevent losing positive overflows) - movh m4, [r2+2*r3] ; read new row + movh m4, [srcq+2*srcstrideq] ; read new row punpcklbw m4, m7 mova m3, m4 - pmullw m0, [r6+0] + pmullw m0, [myq+0] pmullw m4, m5 paddsw m4, m0 ; then calculate positive taps mova m0, m1 - pmullw m1, [r6+16] + pmullw m1, [myq+16] paddsw m4, m1 mova m1, m2 - pmullw m2, [r6+32] + pmullw m2, [myq+32] paddsw m4, m2 mova m2, m3 @@ -599,36 +601,36 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8 paddsw m4, m6 psraw m4, 7 packuswb m4, m7 - movh [r0], m4 + movh [dstq], m4 ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET ; 4x4 block, V-only 6-tap filter -cglobal put_vp8_epel%1_v6, 7, 7, 8 - shl r6d, 4 - lea r6, [r6*3] +cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 + lea myq, [myq*3] %ifdef PIC - lea r11, [sixtap_filter_v_m] + lea picregq, [sixtap_filter_v_m] %endif - lea r6, [sixtap_filter_v+r6-96] + lea myq, [sixtap_filter_v+myq-96] pxor m7, m7 ; read 5 lines - sub r2, r3 - sub r2, r3 - movh m0, [r2] - movh m1, [r2+r3] - movh m2, [r2+r3*2] - lea r2, [r2+r3*2] - add r2, r3 - movh m3, [r2] - movh m4, [r2+r3] + sub srcq, srcstrideq + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + movh m3, [srcq] + movh m4, [srcq+srcstrideq] punpcklbw m0, m7 punpcklbw m1, m7 punpcklbw m2, m7 @@ -638,38 +640,38 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8 .nextrow ; first calculate negative taps (to prevent losing positive overflows) mova m5, m1 - pmullw m5, [r6+16] + pmullw m5, [myq+16] mova m6, m4 - pmullw m6, [r6+64] + pmullw m6, [myq+64] paddsw m6, m5 ; then calculate positive taps - movh m5, [r2+2*r3] ; read new row + movh m5, [srcq+2*srcstrideq] ; read new row punpcklbw m5, m7 - pmullw m0, [r6+0] + pmullw m0, [myq+0] paddsw m6, m0 mova m0, m1 mova m1, m2 - pmullw m2, [r6+32] + pmullw m2, [myq+32] paddsw m6, m2 mova m2, m3 - pmullw m3, [r6+48] + pmullw m3, [myq+48] paddsw m6, m3 mova m3, m4 mova m4, m5 - pmullw m5, [r6+80] + pmullw m5, [myq+80] paddsw m6, m5 ; round/clip/store paddsw m6, [pw_64] psraw m6, 7 packuswb m6, m7 - movh [r0], m6 + movh [dstq], m6 ; go to next line - add r0, r1 - add r2, r3 - dec r4d ; next row + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row jg .nextrow REP_RET %endmacro @@ -680,20 +682,19 @@ INIT_XMM sse2 FILTER_V 8 %macro FILTER_BILINEAR 1 -cglobal put_vp8_bilinear%1_v, 7, 7, 7 - mov r5d, 8*16 - shl r6d, 4 - sub r5d, r6d +cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 %ifdef PIC - lea r11, [bilinear_filter_vw_m] + lea picregq, [bilinear_filter_vw_m] %endif pxor m6, m6 - mova m4, [bilinear_filter_vw+r5-16] - mova m5, [bilinear_filter_vw+r6-16] + mova m5, [bilinear_filter_vw+myq-1*16] + neg myq + mova m4, [bilinear_filter_vw+myq+7*16] .nextrow - movh m0, [r2+r3*0] - movh m1, [r2+r3*1] - movh m3, [r2+r3*2] + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m3, [srcq+srcstrideq*2] punpcklbw m0, m6 punpcklbw m1, m6 punpcklbw m3, m6 @@ -711,35 +712,34 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7 %if mmsize == 8 packuswb m0, m0 packuswb m2, m2 - movh [r0+r1*0], m0 - movh [r0+r1*1], m2 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m2 %else packuswb m0, m2 - movh [r0+r1*0], m0 - movhps [r0+r1*1], m0 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 %endif - lea r0, [r0+r1*2] - lea r2, [r2+r3*2] - sub r4d, 2 + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 jg .nextrow REP_RET -cglobal put_vp8_bilinear%1_h, 7, 7, 7 - mov r6d, 8*16 - shl r5d, 4 - sub r6d, r5d +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 %ifdef PIC - lea r11, [bilinear_filter_vw_m] + lea picregq, [bilinear_filter_vw_m] %endif pxor m6, m6 - mova m4, [bilinear_filter_vw+r6-16] - mova m5, [bilinear_filter_vw+r5-16] + mova m5, [bilinear_filter_vw+mxq-1*16] + neg mxq + mova m4, [bilinear_filter_vw+mxq+7*16] .nextrow - movh m0, [r2+r3*0+0] - movh m1, [r2+r3*0+1] - movh m2, [r2+r3*1+0] - movh m3, [r2+r3*1+1] + movh m0, [srcq+srcstrideq*0+0] + movh m1, [srcq+srcstrideq*0+1] + movh m2, [srcq+srcstrideq*1+0] + movh m3, [srcq+srcstrideq*1+1] punpcklbw m0, m6 punpcklbw m1, m6 punpcklbw m2, m6 @@ -757,17 +757,17 @@ cglobal put_vp8_bilinear%1_h, 7, 7, 7 %if mmsize == 8 packuswb m0, m0 packuswb m2, m2 - movh [r0+r1*0], m0 - movh [r0+r1*1], m2 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m2 %else packuswb m0, m2 - movh [r0+r1*0], m0 - movhps [r0+r1*1], m0 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 %endif - lea r0, [r0+r1*2] - lea r2, [r2+r3*2] - sub r4d, 2 + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 jg .nextrow REP_RET %endmacro @@ -778,17 +778,17 @@ INIT_XMM sse2 FILTER_BILINEAR 8 %macro FILTER_BILINEAR_SSSE3 1 -cglobal put_vp8_bilinear%1_v, 7, 7, 5 - shl r6d, 4 +cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 %ifdef PIC - lea r11, [bilinear_filter_vb_m] + lea picregq, [bilinear_filter_vb_m] %endif pxor m4, m4 - mova m3, [bilinear_filter_vb+r6-16] + mova m3, [bilinear_filter_vb+myq-16] .nextrow - movh m0, [r2+r3*0] - movh m1, [r2+r3*1] - movh m2, [r2+r3*2] + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m2, [srcq+srcstrideq*2] punpcklbw m0, m1 punpcklbw m1, m2 pmaddubsw m0, m3 @@ -800,31 +800,31 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 5 %if mmsize==8 packuswb m0, m0 packuswb m1, m1 - movh [r0+r1*0], m0 - movh [r0+r1*1], m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 %else packuswb m0, m1 - movh [r0+r1*0], m0 - movhps [r0+r1*1], m0 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 %endif - lea r0, [r0+r1*2] - lea r2, [r2+r3*2] - sub r4d, 2 + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 jg .nextrow REP_RET -cglobal put_vp8_bilinear%1_h, 7, 7, 5 - shl r5d, 4 +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 %ifdef PIC - lea r11, [bilinear_filter_vb_m] + lea picregq, [bilinear_filter_vb_m] %endif pxor m4, m4 mova m2, [filter_h2_shuf] - mova m3, [bilinear_filter_vb+r5-16] + mova m3, [bilinear_filter_vb+mxq-16] .nextrow - movu m0, [r2+r3*0] - movu m1, [r2+r3*1] + movu m0, [srcq+srcstrideq*0] + movu m1, [srcq+srcstrideq*1] pshufb m0, m2 pshufb m1, m2 pmaddubsw m0, m3 @@ -836,17 +836,17 @@ cglobal put_vp8_bilinear%1_h, 7, 7, 5 %if mmsize==8 packuswb m0, m0 packuswb m1, m1 - movh [r0+r1*0], m0 - movh [r0+r1*1], m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 %else packuswb m0, m1 - movh [r0+r1*0], m0 - movhps [r0+r1*1], m0 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 %endif - lea r0, [r0+r1*2] - lea r2, [r2+r3*2] - sub r4d, 2 + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 jg .nextrow REP_RET %endmacro @@ -857,47 +857,47 @@ INIT_XMM ssse3 FILTER_BILINEAR_SSSE3 8 INIT_MMX mmx -cglobal put_vp8_pixels8, 5,5 +cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height .nextrow: - movq mm0, [r2+r3*0] - movq mm1, [r2+r3*1] - lea r2, [r2+r3*2] - movq [r0+r1*0], mm0 - movq [r0+r1*1], mm1 - lea r0, [r0+r1*2] - sub r4d, 2 + movq mm0, [srcq+srcstrideq*0] + movq mm1, [srcq+srcstrideq*1] + lea srcq, [srcq+srcstrideq*2] + movq [dstq+dststrideq*0], mm0 + movq [dstq+dststrideq*1], mm1 + lea dstq, [dstq+dststrideq*2] + sub heightd, 2 jg .nextrow REP_RET %if ARCH_X86_32 INIT_MMX mmx -cglobal put_vp8_pixels16, 5,5 +cglobal put_vp8_pixels16, 5, 5, 0, dst, dststride, src, srcstride, height .nextrow: - movq mm0, [r2+r3*0+0] - movq mm1, [r2+r3*0+8] - movq mm2, [r2+r3*1+0] - movq mm3, [r2+r3*1+8] - lea r2, [r2+r3*2] - movq [r0+r1*0+0], mm0 - movq [r0+r1*0+8], mm1 - movq [r0+r1*1+0], mm2 - movq [r0+r1*1+8], mm3 - lea r0, [r0+r1*2] - sub r4d, 2 + movq mm0, [srcq+srcstrideq*0+0] + movq mm1, [srcq+srcstrideq*0+8] + movq mm2, [srcq+srcstrideq*1+0] + movq mm3, [srcq+srcstrideq*1+8] + lea srcq, [srcq+srcstrideq*2] + movq [dstq+dststrideq*0+0], mm0 + movq [dstq+dststrideq*0+8], mm1 + movq [dstq+dststrideq*1+0], mm2 + movq [dstq+dststrideq*1+8], mm3 + lea dstq, [dstq+dststrideq*2] + sub heightd, 2 jg .nextrow REP_RET %endif INIT_XMM sse -cglobal put_vp8_pixels16, 5,5,2 +cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height .nextrow: - movups xmm0, [r2+r3*0] - movups xmm1, [r2+r3*1] - lea r2, [r2+r3*2] - movaps [r0+r1*0], xmm0 - movaps [r0+r1*1], xmm1 - lea r0, [r0+r1*2] - sub r4d, 2 + movups xmm0, [srcq+srcstrideq*0] + movups xmm1, [srcq+srcstrideq*1] + lea srcq, [srcq+srcstrideq*2] + movaps [dstq+dststrideq*0], xmm0 + movaps [dstq+dststrideq*1], xmm1 + lea dstq, [dstq+dststrideq*2] + sub heightd, 2 jg .nextrow REP_RET -- cgit v1.2.3 From 8476ca3b4ee5455eceb6ea6e58ea8200d63dff9d Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 3 Mar 2012 08:04:40 -0800 Subject: vp8: convert idct x86 assembly to use named arguments. --- libavcodec/x86/vp8dsp.asm | 200 ++++++++++++++++++++++++---------------------- 1 file changed, 103 insertions(+), 97 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 2f45f5a478..74456c687e 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -906,10 +906,10 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height ;----------------------------------------------------------------------------- %macro ADD_DC 4 - %4 m2, [r0+%3] - %4 m3, [r0+r2+%3] - %4 m4, [r1+%3] - %4 m5, [r1+r2+%3] + %4 m2, [dst1q+%3] + %4 m3, [dst1q+strideq+%3] + %4 m4, [dst2q+%3] + %4 m5, [dst2q+strideq+%3] paddusb m2, %1 paddusb m3, %1 paddusb m4, %1 @@ -918,22 +918,22 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height psubusb m3, %2 psubusb m4, %2 psubusb m5, %2 - %4 [r0+%3], m2 - %4 [r0+r2+%3], m3 - %4 [r1+%3], m4 - %4 [r1+r2+%3], m5 + %4 [dst1q+%3], m2 + %4 [dst1q+strideq+%3], m3 + %4 [dst2q+%3], m4 + %4 [dst2q+strideq+%3], m5 %endmacro INIT_MMX mmx -cglobal vp8_idct_dc_add, 3, 3 +cglobal vp8_idct_dc_add, 3, 3, 0, dst, block, stride ; load data - movd m0, [r1] + movd m0, [blockq] ; calculate DC paddw m0, [pw_4] pxor m1, m1 psraw m0, 3 - movd [r1], m1 + movd [blockq], m1 psubw m1, m0 packuswb m0, m0 packuswb m1, m1 @@ -943,24 +943,26 @@ cglobal vp8_idct_dc_add, 3, 3 punpcklwd m1, m1 ; add DC - lea r1, [r0+r2*2] + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] ADD_DC m0, m1, 0, movh RET INIT_XMM sse4 -cglobal vp8_idct_dc_add, 3, 3, 6 +cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride ; load data - movd m0, [r1] + movd m0, [blockq] pxor m1, m1 ; calculate DC paddw m0, [pw_4] - movd [r1], m1 - lea r1, [r0+r2*2] - movd m2, [r0] - movd m3, [r0+r2] - movd m4, [r1] - movd m5, [r1+r2] + movd [blockq], m1 + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] + movd m2, [dst1q] + movd m3, [dst1q+strideq] + movd m4, [dst2q] + movd m5, [dst2q+strideq] psraw m0, 3 pshuflw m0, m0, 0 punpcklqdq m0, m0 @@ -971,10 +973,10 @@ cglobal vp8_idct_dc_add, 3, 3, 6 paddw m2, m0 paddw m4, m0 packuswb m2, m4 - movd [r0], m2 - pextrd [r0+r2], m2, 1 - pextrd [r1], m2, 2 - pextrd [r1+r2], m2, 3 + movd [dst1q], m2 + pextrd [dst1q+strideq], m2, 1 + pextrd [dst2q], m2, 2 + pextrd [dst2q+strideq], m2, 3 RET ;----------------------------------------------------------------------------- @@ -983,21 +985,21 @@ cglobal vp8_idct_dc_add, 3, 3, 6 %if ARCH_X86_32 INIT_MMX mmx -cglobal vp8_idct_dc_add4y, 3, 3 +cglobal vp8_idct_dc_add4y, 3, 3, 0, dst, block, stride ; load data - movd m0, [r1+32*0] ; A - movd m1, [r1+32*2] ; C - punpcklwd m0, [r1+32*1] ; A B - punpcklwd m1, [r1+32*3] ; C D + movd m0, [blockq+32*0] ; A + movd m1, [blockq+32*2] ; C + punpcklwd m0, [blockq+32*1] ; A B + punpcklwd m1, [blockq+32*3] ; C D punpckldq m0, m1 ; A B C D pxor m6, m6 ; calculate DC paddw m0, [pw_4] - movd [r1+32*0], m6 - movd [r1+32*1], m6 - movd [r1+32*2], m6 - movd [r1+32*3], m6 + movd [blockq+32*0], m6 + movd [blockq+32*1], m6 + movd [blockq+32*2], m6 + movd [blockq+32*3], m6 psraw m0, 3 psubw m6, m0 packuswb m0, m0 @@ -1012,28 +1014,29 @@ cglobal vp8_idct_dc_add4y, 3, 3 punpckhbw m7, m7 ; CCCCDDDD ; add DC - lea r1, [r0+r2*2] + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] ADD_DC m0, m6, 0, mova ADD_DC m1, m7, 8, mova RET %endif INIT_XMM sse2 -cglobal vp8_idct_dc_add4y, 3, 3, 6 +cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride ; load data - movd m0, [r1+32*0] ; A - movd m1, [r1+32*2] ; C - punpcklwd m0, [r1+32*1] ; A B - punpcklwd m1, [r1+32*3] ; C D + movd m0, [blockq+32*0] ; A + movd m1, [blockq+32*2] ; C + punpcklwd m0, [blockq+32*1] ; A B + punpcklwd m1, [blockq+32*3] ; C D punpckldq m0, m1 ; A B C D pxor m1, m1 ; calculate DC paddw m0, [pw_4] - movd [r1+32*0], m1 - movd [r1+32*1], m1 - movd [r1+32*2], m1 - movd [r1+32*3], m1 + movd [blockq+32*0], m1 + movd [blockq+32*1], m1 + movd [blockq+32*2], m1 + movd [blockq+32*3], m1 psraw m0, 3 psubw m1, m0 packuswb m0, m0 @@ -1044,7 +1047,8 @@ cglobal vp8_idct_dc_add4y, 3, 3, 6 punpcklbw m1, m1 ; add DC - lea r1, [r0+r2*2] + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] ADD_DC m0, m1, 0, mova RET @@ -1053,21 +1057,21 @@ cglobal vp8_idct_dc_add4y, 3, 3, 6 ;----------------------------------------------------------------------------- INIT_MMX mmx -cglobal vp8_idct_dc_add4uv, 3, 3 +cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride ; load data - movd m0, [r1+32*0] ; A - movd m1, [r1+32*2] ; C - punpcklwd m0, [r1+32*1] ; A B - punpcklwd m1, [r1+32*3] ; C D + movd m0, [blockq+32*0] ; A + movd m1, [blockq+32*2] ; C + punpcklwd m0, [blockq+32*1] ; A B + punpcklwd m1, [blockq+32*3] ; C D punpckldq m0, m1 ; A B C D pxor m6, m6 ; calculate DC paddw m0, [pw_4] - movd [r1+32*0], m6 - movd [r1+32*1], m6 - movd [r1+32*2], m6 - movd [r1+32*3], m6 + movd [blockq+32*0], m6 + movd [blockq+32*1], m6 + movd [blockq+32*2], m6 + movd [blockq+32*3], m6 psraw m0, 3 psubw m6, m0 packuswb m0, m0 @@ -1082,10 +1086,11 @@ cglobal vp8_idct_dc_add4uv, 3, 3 punpckhbw m7, m7 ; CCCCDDDD ; add DC - lea r1, [r0+r2*2] + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] ADD_DC m0, m6, 0, mova - lea r0, [r0+r2*4] - lea r1, [r1+r2*4] + lea dst1q, [dst1q+strideq*4] + lea dst2q, [dst2q+strideq*4] ADD_DC m1, m7, 0, mova RET @@ -1125,24 +1130,24 @@ cglobal vp8_idct_dc_add4uv, 3, 3 %endmacro %macro VP8_IDCT_ADD 0 -cglobal vp8_idct_add, 3, 3 +cglobal vp8_idct_add, 3, 3, 0, dst, block, stride ; load block data - movq m0, [r1+ 0] - movq m1, [r1+ 8] - movq m2, [r1+16] - movq m3, [r1+24] + movq m0, [blockq+ 0] + movq m1, [blockq+ 8] + movq m2, [blockq+16] + movq m3, [blockq+24] movq m6, [pw_20091] movq m7, [pw_17734] %if cpuflag(sse) xorps xmm0, xmm0 - movaps [r1+ 0], xmm0 - movaps [r1+16], xmm0 + movaps [blockq+ 0], xmm0 + movaps [blockq+16], xmm0 %else pxor m4, m4 - movq [r1+ 0], m4 - movq [r1+ 8], m4 - movq [r1+16], m4 - movq [r1+24], m4 + movq [blockq+ 0], m4 + movq [blockq+ 8], m4 + movq [blockq+16], m4 + movq [blockq+24], m4 %endif ; actual IDCT @@ -1154,9 +1159,10 @@ cglobal vp8_idct_add, 3, 3 ; store pxor m4, m4 - lea r1, [r0+2*r2] - STORE_DIFFx2 m0, m1, m6, m7, m4, 3, r0, r2 - STORE_DIFFx2 m2, m3, m6, m7, m4, 3, r1, r2 + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+2*strideq] + STORE_DIFFx2 m0, m1, m6, m7, m4, 3, dst1q, strideq + STORE_DIFFx2 m2, m3, m6, m7, m4, 3, dst2q, strideq RET %endmacro @@ -1173,24 +1179,24 @@ VP8_IDCT_ADD ;----------------------------------------------------------------------------- %macro SCATTER_WHT 3 - movd r1d, m%1 - movd r2d, m%2 - mov [r0+2*16*(0+%3)], r1w - mov [r0+2*16*(1+%3)], r2w - shr r1d, 16 - shr r2d, 16 + movd dc1d, m%1 + movd dc2d, m%2 + mov [blockq+2*16*(0+%3)], dc1w + mov [blockq+2*16*(1+%3)], dc2w + shr dc1d, 16 + shr dc2d, 16 psrlq m%1, 32 psrlq m%2, 32 - mov [r0+2*16*(4+%3)], r1w - mov [r0+2*16*(5+%3)], r2w - movd r1d, m%1 - movd r2d, m%2 - mov [r0+2*16*(8+%3)], r1w - mov [r0+2*16*(9+%3)], r2w - shr r1d, 16 - shr r2d, 16 - mov [r0+2*16*(12+%3)], r1w - mov [r0+2*16*(13+%3)], r2w + mov [blockq+2*16*(4+%3)], dc1w + mov [blockq+2*16*(5+%3)], dc2w + movd dc1d, m%1 + movd dc2d, m%2 + mov [blockq+2*16*(8+%3)], dc1w + mov [blockq+2*16*(9+%3)], dc2w + shr dc1d, 16 + shr dc2d, 16 + mov [blockq+2*16*(12+%3)], dc1w + mov [blockq+2*16*(13+%3)], dc2w %endmacro %macro HADAMARD4_1D 4 @@ -1200,21 +1206,21 @@ VP8_IDCT_ADD %endmacro %macro VP8_DC_WHT 0 -cglobal vp8_luma_dc_wht, 2, 3 - movq m0, [r1] - movq m1, [r1+8] - movq m2, [r1+16] - movq m3, [r1+24] +cglobal vp8_luma_dc_wht, 2, 3, 0, block, dc1, dc2 + movq m0, [dc1q] + movq m1, [dc1q+8] + movq m2, [dc1q+16] + movq m3, [dc1q+24] %if cpuflag(sse) xorps xmm0, xmm0 - movaps [r1+ 0], xmm0 - movaps [r1+16], xmm0 + movaps [dc1q+ 0], xmm0 + movaps [dc1q+16], xmm0 %else pxor m4, m4 - movq [r1+ 0], m4 - movq [r1+ 8], m4 - movq [r1+16], m4 - movq [r1+24], m4 + movq [dc1q+ 0], m4 + movq [dc1q+ 8], m4 + movq [dc1q+16], m4 + movq [dc1q+24], m4 %endif HADAMARD4_1D 0, 1, 2, 3 TRANSPOSE4x4W 0, 1, 2, 3, 4 -- cgit v1.2.3 From b4188f0d4688477d1f72914105a485558d86662b Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 3 Mar 2012 12:55:34 -0800 Subject: vp8: convert simple loopfilter x86 assembly to use named arguments. --- libavcodec/x86/vp8dsp.asm | 55 ++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 74456c687e..8e13560b9e 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -1489,20 +1489,25 @@ VP8_DC_WHT %endmacro %macro SIMPLE_LOOPFILTER 2 -cglobal vp8_%1_loop_filter_simple, 3, %2, 8 +cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr %if mmsize == 8 ; mmx/mmxext - mov r3, 2 + mov cntrq, 2 %endif %if cpuflag(ssse3) pxor m0, m0 %endif - SPLATB_REG m7, r2, m0 ; splat "flim" into register + SPLATB_REG m7, flim, m0 ; splat "flim" into register ; set up indexes to address 4 rows - mov r2, r1 - neg r1 +%if mmsize == 8 + DEFINE_ARGS dst1, mstride, stride, cntr, dst2 +%else + DEFINE_ARGS dst1, mstride, stride, dst3, dst2 +%endif + mov strideq, mstrideq + neg mstrideq %ifidn %1, h - lea r0, [r0+4*r2-2] + lea dst1q, [dst1q+4*strideq-2] %endif %if mmsize == 8 ; mmx / mmxext @@ -1510,17 +1515,17 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8 %endif %ifidn %1, v ; read 4 half/full rows of pixels - mova m0, [r0+r1*2] ; p1 - mova m1, [r0+r1] ; p0 - mova m2, [r0] ; q0 - mova m3, [r0+r2] ; q1 + mova m0, [dst1q+mstrideq*2] ; p1 + mova m1, [dst1q+mstrideq] ; p0 + mova m2, [dst1q] ; q0 + mova m3, [dst1q+ strideq] ; q1 %else ; h - lea r4, [r0+r2] + lea dst2q, [dst1q+ strideq] %if mmsize == 8 ; mmx/mmxext - READ_8x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, r0, r4, r1, r2 + READ_8x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq %else ; sse2 - READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, r0, r4, r1, r2, r3 + READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq, dst3q %endif TRANSPOSE4x4W 0, 1, 2, 3, 4 %endif @@ -1590,35 +1595,35 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8 ; store %ifidn %1, v - mova [r0], m4 - mova [r0+r1], m6 + mova [dst1q], m4 + mova [dst1q+mstrideq], m6 %else ; h - inc r0 + inc dst1q SBUTTERFLY bw, 6, 4, 0 %if mmsize == 16 ; sse2 %if cpuflag(sse4) - inc r4 + inc dst2q %endif - WRITE_8W m6, r4, r0, r1, r2 - lea r4, [r3+r1+1] + WRITE_8W m6, dst2q, dst1q, mstrideq, strideq + lea dst2q, [dst3q+mstrideq+1] %if cpuflag(sse4) - inc r3 + inc dst3q %endif - WRITE_8W m4, r3, r4, r1, r2 + WRITE_8W m4, dst3q, dst2q, mstrideq, strideq %else ; mmx/mmxext - WRITE_2x4W m6, m4, r4, r0, r1, r2 + WRITE_2x4W m6, m4, dst2q, dst1q, mstrideq, strideq %endif %endif %if mmsize == 8 ; mmx/mmxext ; next 8 pixels %ifidn %1, v - add r0, 8 ; advance 8 cols = pixels + add dst1q, 8 ; advance 8 cols = pixels %else ; h - lea r0, [r0+r2*8-1] ; advance 8 rows = lines + lea dst1q, [dst1q+strideq*8-1] ; advance 8 rows = lines %endif - dec r3 + dec cntrq jg .next8px REP_RET %else ; sse2 -- cgit v1.2.3 From 3e9cd8b4b0b7b5cd5c1c2119da7b3e7d4c1fb86a Mon Sep 17 00:00:00 2001 From: Aneesh Dogra Date: Sun, 4 Mar 2012 09:59:43 +0530 Subject: qpeg: Use bytestream2 functions to prevent buffer overreads. Signed-off-by: Ronald S. Bultje --- libavcodec/qpeg.c | 87 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 43 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/qpeg.c b/libavcodec/qpeg.c index 0f1bcd7ac9..f8cbef37f2 100644 --- a/libavcodec/qpeg.c +++ b/libavcodec/qpeg.c @@ -25,16 +25,18 @@ */ #include "avcodec.h" +#include "bytestream.h" typedef struct QpegContext{ AVCodecContext *avctx; AVFrame pic; uint8_t *refdata; uint32_t pal[256]; + GetByteContext buffer; } QpegContext; -static void qpeg_decode_intra(const uint8_t *src, uint8_t *dst, int size, - int stride, int width, int height) +static void qpeg_decode_intra(QpegContext *qctx, uint8_t *dst, + int stride, int width, int height) { int i; int code; @@ -47,31 +49,26 @@ static void qpeg_decode_intra(const uint8_t *src, uint8_t *dst, int size, height--; dst = dst + height * stride; - while((size > 0) && (rows_to_go > 0)) { - code = *src++; - size--; + while ((bytestream2_get_bytes_left(&qctx->buffer) > 0) && (rows_to_go > 0)) { + code = bytestream2_get_byte(&qctx->buffer); run = copy = 0; if(code == 0xFC) /* end-of-picture code */ break; if(code >= 0xF8) { /* very long run */ - c0 = *src++; - c1 = *src++; - size -= 2; + c0 = bytestream2_get_byte(&qctx->buffer); + c1 = bytestream2_get_byte(&qctx->buffer); run = ((code & 0x7) << 16) + (c0 << 8) + c1 + 2; } else if (code >= 0xF0) { /* long run */ - c0 = *src++; - size--; + c0 = bytestream2_get_byte(&qctx->buffer); run = ((code & 0xF) << 8) + c0 + 2; } else if (code >= 0xE0) { /* short run */ run = (code & 0x1F) + 2; } else if (code >= 0xC0) { /* very long copy */ - c0 = *src++; - c1 = *src++; - size -= 2; + c0 = bytestream2_get_byte(&qctx->buffer); + c1 = bytestream2_get_byte(&qctx->buffer); copy = ((code & 0x3F) << 16) + (c0 << 8) + c1 + 1; } else if (code >= 0x80) { /* long copy */ - c0 = *src++; - size--; + c0 = bytestream2_get_byte(&qctx->buffer); copy = ((code & 0x7F) << 8) + c0 + 1; } else { /* short copy */ copy = code + 1; @@ -81,8 +78,7 @@ static void qpeg_decode_intra(const uint8_t *src, uint8_t *dst, int size, if(run) { int p; - p = *src++; - size--; + p = bytestream2_get_byte(&qctx->buffer); for(i = 0; i < run; i++) { dst[filled++] = p; if (filled >= width) { @@ -94,9 +90,8 @@ static void qpeg_decode_intra(const uint8_t *src, uint8_t *dst, int size, } } } else { - size -= copy; for(i = 0; i < copy; i++) { - dst[filled++] = *src++; + dst[filled++] = bytestream2_get_byte(&qctx->buffer); if (filled >= width) { filled = 0; dst -= stride; @@ -115,9 +110,10 @@ static const int qpeg_table_w[16] = { 0x00, 0x20, 0x18, 0x08, 0x18, 0x10, 0x20, 0x10, 0x08, 0x10, 0x20, 0x20, 0x08, 0x10, 0x18, 0x04}; /* Decodes delta frames */ -static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, - int stride, int width, int height, - int delta, const uint8_t *ctable, uint8_t *refdata) +static void qpeg_decode_inter(QpegContext *qctx, uint8_t *dst, + int stride, int width, int height, + int delta, const uint8_t *ctable, + uint8_t *refdata) { int i, j; int code; @@ -132,9 +128,8 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, height--; dst = dst + height * stride; - while((size > 0) && (height >= 0)) { - code = *src++; - size--; + while ((bytestream2_get_bytes_left(&qctx->buffer) > 0) && (height >= 0)) { + code = bytestream2_get_byte(&qctx->buffer); if(delta) { /* motion compensation */ @@ -151,8 +146,7 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, me_h = qpeg_table_h[me_idx]; /* extract motion vector */ - corr = *src++; - size--; + corr = bytestream2_get_byte(&qctx->buffer); val = corr >> 4; if(val > 7) @@ -179,8 +173,7 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, } } } - code = *src++; - size--; + code = bytestream2_get_byte(&qctx->buffer); } } @@ -190,8 +183,7 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, int p; code &= 0x1F; - p = *src++; - size--; + p = bytestream2_get_byte(&qctx->buffer); for(i = 0; i <= code; i++) { dst[filled++] = p; if(filled >= width) { @@ -204,14 +196,13 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, code &= 0x1F; for(i = 0; i <= code; i++) { - dst[filled++] = *src++; + dst[filled++] = bytestream2_get_byte(&qctx->buffer); if(filled >= width) { filled = 0; dst -= stride; height--; } } - size -= code + 1; } else if(code >= 0x80) { /* skip code: 0x80..0xBF */ int skip; @@ -219,9 +210,9 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, /* codes 0x80 and 0x81 are actually escape codes, skip value minus constant is in the next byte */ if(!code) - skip = (*src++) + 64; + skip = bytestream2_get_byte(&qctx->buffer) + 64; else if(code == 1) - skip = (*src++) + 320; + skip = bytestream2_get_byte(&qctx->buffer) + 320; else skip = code; filled += skip; @@ -234,8 +225,9 @@ static void qpeg_decode_inter(const uint8_t *src, uint8_t *dst, int size, } } else { /* zero code treated as one-pixel skip */ - if(code) + if(code) { dst[filled++] = ctable[code & 0x7F]; + } else filled++; if(filled >= width) { @@ -251,25 +243,34 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { - const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; + uint8_t ctable[128]; QpegContext * const a = avctx->priv_data; AVFrame * const p = &a->pic; uint8_t* outdata; int delta; const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL); + if (avpkt->size < 0x86) { + av_log(avctx, AV_LOG_ERROR, "Packet is too small\n"); + return AVERROR_INVALIDDATA; + } + + bytestream2_init(&a->buffer, avpkt->data, avpkt->size); p->reference = 3; if (avctx->reget_buffer(avctx, p) < 0) { av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); return -1; } outdata = a->pic.data[0]; - if(buf[0x85] == 0x10) { - qpeg_decode_intra(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height); + bytestream2_skip(&a->buffer, 4); + bytestream2_get_buffer(&a->buffer, ctable, 128); + bytestream2_skip(&a->buffer, 1); + + delta = bytestream2_get_byte(&a->buffer); + if(delta == 0x10) { + qpeg_decode_intra(a, outdata, a->pic.linesize[0], avctx->width, avctx->height); } else { - delta = buf[0x85]; - qpeg_decode_inter(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height, delta, buf + 4, a->refdata); + qpeg_decode_inter(a, outdata, a->pic.linesize[0], avctx->width, avctx->height, delta, ctable, a->refdata); } /* make the palette available on the way out */ @@ -282,7 +283,7 @@ static int decode_frame(AVCodecContext *avctx, *data_size = sizeof(AVFrame); *(AVFrame*)data = a->pic; - return buf_size; + return avpkt->size; } static av_cold int decode_init(AVCodecContext *avctx){ -- cgit v1.2.3 From 6c7a01621ce0633de7a2a2ebbc0a2ccabdda3248 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Mon, 27 Feb 2012 23:32:23 -0500 Subject: nellymoserenc: use proper MDCT overlap delay --- libavcodec/nellymoserenc.c | 48 ++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c index e60b0b5afa..5848191aeb 100644 --- a/libavcodec/nellymoserenc.c +++ b/libavcodec/nellymoserenc.c @@ -52,13 +52,11 @@ typedef struct NellyMoserEncodeContext { AVCodecContext *avctx; int last_frame; - int bufsel; - int have_saved; DSPContext dsp; FFTContext mdct_ctx; DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES]; DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES]; - DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer + DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer float (*opt )[NELLY_BANDS]; uint8_t (*path)[NELLY_BANDS]; } NellyMoserEncodeContext; @@ -115,16 +113,17 @@ static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; static void apply_mdct(NellyMoserEncodeContext *s) { - s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN); - s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, - NELLY_BUF_LEN); + float *in0 = s->buf; + float *in1 = s->buf + NELLY_BUF_LEN; + float *in2 = s->buf + 2 * NELLY_BUF_LEN; + + s->dsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN); + s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN); s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); - s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, - ff_sine_128, NELLY_BUF_LEN); - s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128, - NELLY_BUF_LEN); - s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN); + s->dsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN); + s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN); + s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff); } static av_cold int encode_end(AVCodecContext *avctx) @@ -161,6 +160,7 @@ static av_cold int encode_init(AVCodecContext *avctx) } avctx->frame_size = NELLY_SAMPLES; + avctx->delay = NELLY_BUF_LEN; s->avctx = avctx; if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0) goto error; @@ -369,32 +369,26 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, voi { NellyMoserEncodeContext *s = avctx->priv_data; const float *samples = data; - int i; if (s->last_frame) return 0; + memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf)); if (data) { - memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples)); - for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) { - s->buf[s->bufsel][i] = 0; - } - s->bufsel = 1 - s->bufsel; - if (!s->have_saved) { - s->have_saved = 1; - return 0; + memcpy(s->buf + NELLY_BUF_LEN, samples, avctx->frame_size * sizeof(*s->buf)); + if (avctx->frame_size < NELLY_SAMPLES) { + memset(s->buf + NELLY_BUF_LEN + avctx->frame_size, 0, + (NELLY_SAMPLES - avctx->frame_size) * sizeof(*s->buf)); + if (avctx->frame_size >= NELLY_BUF_LEN) + s->last_frame = 1; } } else { - memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN); - s->bufsel = 1 - s->bufsel; + memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf)); s->last_frame = 1; } - if (s->have_saved) { - encode_block(s, frame, buf_size); - return NELLY_BLOCK_LEN; - } - return 0; + encode_block(s, frame, buf_size); + return NELLY_BLOCK_LEN; } AVCodec ff_nellymoser_encoder = { -- cgit v1.2.3 From 29e2c8531096a5fb67079551564b4ab3f9acd8a6 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Mon, 27 Feb 2012 23:39:50 -0500 Subject: nellymoserenc: zero any leftover packet bytes fixes writing of uninitialized packet data --- libavcodec/nellymoserenc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'libavcodec') diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c index 5848191aeb..c0b174675b 100644 --- a/libavcodec/nellymoserenc.c +++ b/libavcodec/nellymoserenc.c @@ -363,6 +363,7 @@ static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int } flush_put_bits(&pb); + memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb)); } static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) -- cgit v1.2.3 From b0350c1c30908dbe7901c4eb07663bb58e575902 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Tue, 28 Feb 2012 01:02:28 -0500 Subject: ra144enc: fix end-of-stream handling Use CODEC_CAP_DELAY and CODEC_CAP_SMALL_LAST_FRAME to properly pad and flush the encoder at the end of encoding. This is needed in order to have all input samples decoded. --- libavcodec/ra144.h | 1 + libavcodec/ra144enc.c | 33 +++++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/ra144.h b/libavcodec/ra144.h index f6475d45ff..189c73a716 100644 --- a/libavcodec/ra144.h +++ b/libavcodec/ra144.h @@ -36,6 +36,7 @@ typedef struct { AVCodecContext *avctx; AVFrame frame; LPCContext lpc_ctx; + int last_frame; unsigned int old_energy; ///< previous frame energy diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c index ff8316912f..aec2d6d586 100644 --- a/libavcodec/ra144enc.c +++ b/libavcodec/ra144enc.c @@ -53,6 +53,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx) return -1; } avctx->frame_size = NBLOCKS * BLOCKSIZE; + avctx->delay = avctx->frame_size; avctx->bit_rate = 8000; ractx = avctx->priv_data; ractx->lpc_coef[0] = ractx->lpc_tables[0]; @@ -433,7 +434,7 @@ static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame, { static const uint8_t sizes[LPC_ORDER] = {64, 32, 32, 16, 16, 8, 8, 8, 8, 4}; static const uint8_t bit_sizes[LPC_ORDER] = {6, 5, 5, 4, 4, 3, 3, 3, 3, 2}; - RA144Context *ractx; + RA144Context *ractx = avctx->priv_data; PutBitContext pb; int32_t lpc_data[NBLOCKS * BLOCKSIZE]; int32_t lpc_coefs[LPC_ORDER][MAX_LPC_ORDER]; @@ -445,11 +446,13 @@ static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame, int energy = 0; int i, idx; + if (ractx->last_frame) + return 0; + if (buf_size < FRAMESIZE) { av_log(avctx, AV_LOG_ERROR, "output buffer too small\n"); return 0; } - ractx = avctx->priv_data; /** * Since the LPC coefficients are calculated on a frame centered over the @@ -462,11 +465,15 @@ static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame, lpc_data[i] = ractx->curr_block[BLOCKSIZE + BLOCKSIZE / 2 + i]; energy += (lpc_data[i] * lpc_data[i]) >> 4; } - for (i = 2 * BLOCKSIZE + BLOCKSIZE / 2; i < NBLOCKS * BLOCKSIZE; i++) { - lpc_data[i] = *((int16_t *)data + i - 2 * BLOCKSIZE - BLOCKSIZE / 2) >> - 2; - energy += (lpc_data[i] * lpc_data[i]) >> 4; + if (data) { + int j; + for (j = 0; j < avctx->frame_size && i < NBLOCKS * BLOCKSIZE; i++, j++) { + lpc_data[i] = samples[j] >> 2; + energy += (lpc_data[i] * lpc_data[i]) >> 4; + } } + if (i < NBLOCKS * BLOCKSIZE) + memset(&lpc_data[i], 0, (NBLOCKS * BLOCKSIZE - i) * sizeof(*lpc_data)); energy = ff_energy_tab[quantize(ff_t_sqrt(energy >> 5) >> 10, ff_energy_tab, 32)]; @@ -515,8 +522,17 @@ static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame, ractx->old_energy = energy; ractx->lpc_refl_rms[1] = ractx->lpc_refl_rms[0]; FFSWAP(unsigned int *, ractx->lpc_coef[0], ractx->lpc_coef[1]); - for (i = 0; i < NBLOCKS * BLOCKSIZE; i++) - ractx->curr_block[i] = samples[i] >> 2; + + /* copy input samples to current block for processing in next call */ + i = 0; + if (data) { + for (; i < avctx->frame_size; i++) + ractx->curr_block[i] = samples[i] >> 2; + } else + ractx->last_frame = 1; + memset(&ractx->curr_block[i], 0, + (NBLOCKS * BLOCKSIZE - i) * sizeof(*ractx->curr_block)); + return FRAMESIZE; } @@ -529,6 +545,7 @@ AVCodec ff_ra_144_encoder = { .init = ra144_encode_init, .encode = ra144_encode_frame, .close = ra144_encode_close, + .capabilities = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME, .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"), -- cgit v1.2.3 From fe78470a8baf0198e9442a9eece24cc9c8462155 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sun, 4 Mar 2012 00:25:45 -0500 Subject: libopencore-amrnbenc: fix end-of-stream handling Use CODEC_CAP_DELAY and CODEC_CAP_SMALL_LAST_FRAME to properly pad and flush the encoder at the end of encoding. This is needed in order to have all input samples decoded. --- libavcodec/libopencore-amr.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'libavcodec') diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c index ebbc0d90d7..9e2c5f160b 100644 --- a/libavcodec/libopencore-amr.c +++ b/libavcodec/libopencore-amr.c @@ -85,6 +85,7 @@ typedef struct AMRContext { int enc_bitrate; int enc_mode; int enc_dtx; + int enc_last_frame; } AMRContext; static const AVOption options[] = { @@ -195,6 +196,7 @@ static av_cold int amr_nb_encode_init(AVCodecContext *avctx) } avctx->frame_size = 160; + avctx->delay = 50; avctx->coded_frame = avcodec_alloc_frame(); if (!avctx->coded_frame) return AVERROR(ENOMEM); @@ -227,17 +229,40 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, { AMRContext *s = avctx->priv_data; int written; + int16_t *flush_buf = NULL; + const int16_t *samples = data; if (s->enc_bitrate != avctx->bit_rate) { s->enc_mode = get_bitrate_mode(avctx->bit_rate, avctx); s->enc_bitrate = avctx->bit_rate; } - written = Encoder_Interface_Encode(s->enc_state, s->enc_mode, data, + if (data) { + if (avctx->frame_size < 160) { + flush_buf = av_mallocz(160 * sizeof(*flush_buf)); + if (!flush_buf) + return AVERROR(ENOMEM); + memcpy(flush_buf, samples, avctx->frame_size * sizeof(*flush_buf)); + samples = flush_buf; + if (avctx->frame_size < 110) + s->enc_last_frame = -1; + } + } else { + if (s->enc_last_frame < 0) + return 0; + flush_buf = av_mallocz(160 * sizeof(*flush_buf)); + if (!flush_buf) + return AVERROR(ENOMEM); + samples = flush_buf; + s->enc_last_frame = -1; + } + + written = Encoder_Interface_Encode(s->enc_state, s->enc_mode, samples, frame, 0); av_dlog(avctx, "amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n", written, s->enc_mode, frame[0]); + av_freep(&flush_buf); return written; } @@ -249,6 +274,7 @@ AVCodec ff_libopencore_amrnb_encoder = { .init = amr_nb_encode_init, .encode = amr_nb_encode_frame, .close = amr_nb_encode_close, + .capabilities = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME, .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("OpenCORE Adaptive Multi-Rate (AMR) Narrow-Band"), .priv_class = &class, -- cgit v1.2.3 From 1ba08c94f5bb4d1c3c2d3651b5e01edb4ce172e2 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 29 Feb 2012 02:56:01 -0500 Subject: vorbisenc: add output buffer overwrite protection --- libavcodec/vorbisenc.c | 59 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 16 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 9257333c1c..1566f446ce 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -137,13 +137,16 @@ typedef struct { #define RESIDUE_PART_SIZE 32 #define NUM_RESIDUE_PARTITIONS (RESIDUE_SIZE/RESIDUE_PART_SIZE) -static inline void put_codeword(PutBitContext *pb, vorbis_enc_codebook *cb, - int entry) +static inline int put_codeword(PutBitContext *pb, vorbis_enc_codebook *cb, + int entry) { assert(entry >= 0); assert(entry < cb->nentries); assert(cb->lens[entry]); + if (pb->size_in_bits - put_bits_count(pb) < cb->lens[entry]) + return AVERROR(EINVAL); put_bits(pb, cb->lens[entry], cb->codewords[entry]); + return 0; } static int cb_lookup_vals(int lookup, int dimentions, int entries) @@ -751,14 +754,16 @@ static int render_point(int x0, int y0, int x1, int y1, int x) return y0 + (x - x0) * (y1 - y0) / (x1 - x0); } -static void floor_encode(vorbis_enc_context *venc, vorbis_enc_floor *fc, - PutBitContext *pb, uint16_t *posts, - float *floor, int samples) +static int floor_encode(vorbis_enc_context *venc, vorbis_enc_floor *fc, + PutBitContext *pb, uint16_t *posts, + float *floor, int samples) { int range = 255 / fc->multiplier + 1; int coded[MAX_FLOOR_VALUES]; // first 2 values are unused int i, counter; + if (pb->size_in_bits - put_bits_count(pb) < 1 + 2 * ilog(range - 1)) + return AVERROR(EINVAL); put_bits(pb, 1, 1); // non zero put_bits(pb, ilog(range - 1), posts[0]); put_bits(pb, ilog(range - 1), posts[1]); @@ -816,7 +821,8 @@ static void floor_encode(vorbis_enc_context *venc, vorbis_enc_floor *fc, cval |= l << cshift; cshift += c->subclass; } - put_codeword(pb, book, cval); + if (put_codeword(pb, book, cval)) + return AVERROR(EINVAL); } for (k = 0; k < c->dim; k++) { int book = c->books[cval & (csub-1)]; @@ -826,12 +832,15 @@ static void floor_encode(vorbis_enc_context *venc, vorbis_enc_floor *fc, continue; if (entry == -1) entry = 0; - put_codeword(pb, &venc->codebooks[book], entry); + if (put_codeword(pb, &venc->codebooks[book], entry)) + return AVERROR(EINVAL); } } ff_vorbis_floor1_render_list(fc->list, fc->values, posts, coded, fc->multiplier, floor, samples); + + return 0; } static float *put_vector(vorbis_enc_codebook *book, PutBitContext *pb, @@ -852,13 +861,14 @@ static float *put_vector(vorbis_enc_codebook *book, PutBitContext *pb, distance = d; } } - put_codeword(pb, book, entry); + if (put_codeword(pb, book, entry)) + return NULL; return &book->dimentions[entry * book->ndimentions]; } -static void residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, - PutBitContext *pb, float *coeffs, int samples, - int real_ch) +static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, + PutBitContext *pb, float *coeffs, int samples, + int real_ch) { int pass, i, j, p, k; int psize = rc->partition_size; @@ -894,7 +904,8 @@ static void residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, entry *= rc->classifications; entry += classes[j][p + i]; } - put_codeword(pb, book, entry); + if (put_codeword(pb, book, entry)) + return AVERROR(EINVAL); } for (i = 0; i < classwords && p < partitions; i++, p++) { for (j = 0; j < channels; j++) { @@ -909,8 +920,10 @@ static void residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, if (rc->type == 0) { for (k = 0; k < psize; k += book->ndimentions) { - float *a = put_vector(book, pb, &buf[k]); int l; + float *a = put_vector(book, pb, &buf[k]); + if (!a) + return AVERROR(EINVAL); for (l = 0; l < book->ndimentions; l++) buf[k + l] -= a[l]; } @@ -930,6 +943,8 @@ static void residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, } } pv = put_vector(book, pb, vec); + if (!pv) + return AVERROR(EINVAL); for (dim = book->ndimentions; dim--; ) { coeffs[a1 + b1] -= *pv++; if ((a1 += samples) == s) { @@ -943,6 +958,7 @@ static void residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, } } } + return 0; } static int apply_window_and_mdct(vorbis_enc_context *venc, const signed short *audio, @@ -1017,6 +1033,11 @@ static int vorbis_encode_frame(AVCodecContext *avccontext, init_put_bits(&pb, packets, buf_size); + if (pb.size_in_bits - put_bits_count(&pb) < 1 + ilog(venc->nmodes - 1)) { + av_log(avccontext, AV_LOG_ERROR, "output buffer is too small\n"); + return AVERROR(EINVAL); + } + put_bits(&pb, 1, 0); // magic bit put_bits(&pb, ilog(venc->nmodes - 1), 0); // 0 bits, the mode @@ -1032,7 +1053,10 @@ static int vorbis_encode_frame(AVCodecContext *avccontext, vorbis_enc_floor *fc = &venc->floors[mapping->floor[mapping->mux[i]]]; uint16_t posts[MAX_FLOOR_VALUES]; floor_fit(venc, fc, &venc->coeffs[i * samples], posts, samples); - floor_encode(venc, fc, &pb, posts, &venc->floor[i * samples], samples); + if (floor_encode(venc, fc, &pb, posts, &venc->floor[i * samples], samples)) { + av_log(avccontext, AV_LOG_ERROR, "output buffer is too small\n"); + return AVERROR(EINVAL); + } } for (i = 0; i < venc->channels * samples; i++) @@ -1052,8 +1076,11 @@ static int vorbis_encode_frame(AVCodecContext *avccontext, } } - residue_encode(venc, &venc->residues[mapping->residue[mapping->mux[0]]], - &pb, venc->coeffs, samples, venc->channels); + if (residue_encode(venc, &venc->residues[mapping->residue[mapping->mux[0]]], + &pb, venc->coeffs, samples, venc->channels)) { + av_log(avccontext, AV_LOG_ERROR, "output buffer is too small\n"); + return AVERROR(EINVAL); + } avccontext->coded_frame->pts = venc->sample_count; venc->sample_count += avccontext->frame_size; -- cgit v1.2.3 From 4db4b53dc8ac81f10414b48aa6d954ba2c232a92 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Sat, 3 Mar 2012 19:14:35 +0100 Subject: proresenc: give user a possibility to alter some encoding parameters This allows user to select quantisation matrix from different profile, stamp frames with custom vendor string and change target bitrate. --- libavcodec/proresenc.c | 178 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 122 insertions(+), 56 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/proresenc.c b/libavcodec/proresenc.c index 8e6f93fe2b..9f26def1df 100644 --- a/libavcodec/proresenc.c +++ b/libavcodec/proresenc.c @@ -42,6 +42,67 @@ enum { PRORES_PROFILE_HQ, }; +enum { + QUANT_MAT_PROXY = 0, + QUANT_MAT_LT, + QUANT_MAT_STANDARD, + QUANT_MAT_HQ, + QUANT_MAT_DEFAULT, +}; + +static const uint8_t prores_quant_matrices[][64] = { + { // proxy + 4, 7, 9, 11, 13, 14, 15, 63, + 7, 7, 11, 12, 14, 15, 63, 63, + 9, 11, 13, 14, 15, 63, 63, 63, + 11, 11, 13, 14, 63, 63, 63, 63, + 11, 13, 14, 63, 63, 63, 63, 63, + 13, 14, 63, 63, 63, 63, 63, 63, + 13, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + }, + { // LT + 4, 5, 6, 7, 9, 11, 13, 15, + 5, 5, 7, 8, 11, 13, 15, 17, + 6, 7, 9, 11, 13, 15, 15, 17, + 7, 7, 9, 11, 13, 15, 17, 19, + 7, 9, 11, 13, 14, 16, 19, 23, + 9, 11, 13, 14, 16, 19, 23, 29, + 9, 11, 13, 15, 17, 21, 28, 35, + 11, 13, 16, 17, 21, 28, 35, 41, + }, + { // standard + 4, 4, 5, 5, 6, 7, 7, 9, + 4, 4, 5, 6, 7, 7, 9, 9, + 5, 5, 6, 7, 7, 9, 9, 10, + 5, 5, 6, 7, 7, 9, 9, 10, + 5, 6, 7, 7, 8, 9, 10, 12, + 6, 7, 7, 8, 9, 10, 12, 15, + 6, 7, 7, 9, 10, 11, 14, 17, + 7, 7, 9, 10, 11, 14, 17, 21, + }, + { // high quality + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 4, 5, 5, 6, + 4, 4, 4, 4, 5, 5, 6, 7, + 4, 4, 4, 4, 5, 6, 7, 7, + }, + { // codec default + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + }, +}; + #define NUM_MB_LIMITS 4 static const int prores_mb_limits[NUM_MB_LIMITS] = { 1620, // up to 720x576 @@ -56,7 +117,7 @@ static const struct prores_profile { int min_quant; int max_quant; int br_tab[NUM_MB_LIMITS]; - uint8_t quant[64]; + int quant; } prores_profile_info[4] = { { .full_name = "proxy", @@ -64,16 +125,7 @@ static const struct prores_profile { .min_quant = 4, .max_quant = 8, .br_tab = { 300, 242, 220, 194 }, - .quant = { - 4, 7, 9, 11, 13, 14, 15, 63, - 7, 7, 11, 12, 14, 15, 63, 63, - 9, 11, 13, 14, 15, 63, 63, 63, - 11, 11, 13, 14, 63, 63, 63, 63, - 11, 13, 14, 63, 63, 63, 63, 63, - 13, 14, 63, 63, 63, 63, 63, 63, - 13, 63, 63, 63, 63, 63, 63, 63, - 63, 63, 63, 63, 63, 63, 63, 63, - }, + .quant = QUANT_MAT_PROXY, }, { .full_name = "LT", @@ -81,16 +133,7 @@ static const struct prores_profile { .min_quant = 1, .max_quant = 9, .br_tab = { 720, 560, 490, 440 }, - .quant = { - 4, 5, 6, 7, 9, 11, 13, 15, - 5, 5, 7, 8, 11, 13, 15, 17, - 6, 7, 9, 11, 13, 15, 15, 17, - 7, 7, 9, 11, 13, 15, 17, 19, - 7, 9, 11, 13, 14, 16, 19, 23, - 9, 11, 13, 14, 16, 19, 23, 29, - 9, 11, 13, 15, 17, 21, 28, 35, - 11, 13, 16, 17, 21, 28, 35, 41, - }, + .quant = QUANT_MAT_LT, }, { .full_name = "standard", @@ -98,16 +141,7 @@ static const struct prores_profile { .min_quant = 1, .max_quant = 6, .br_tab = { 1050, 808, 710, 632 }, - .quant = { - 4, 4, 5, 5, 6, 7, 7, 9, - 4, 4, 5, 6, 7, 7, 9, 9, - 5, 5, 6, 7, 7, 9, 9, 10, - 5, 5, 6, 7, 7, 9, 9, 10, - 5, 6, 7, 7, 8, 9, 10, 12, - 6, 7, 7, 8, 9, 10, 12, 15, - 6, 7, 7, 9, 10, 11, 14, 17, - 7, 7, 9, 10, 11, 14, 17, 21, - }, + .quant = QUANT_MAT_STANDARD, }, { .full_name = "high quality", @@ -115,16 +149,7 @@ static const struct prores_profile { .min_quant = 1, .max_quant = 6, .br_tab = { 1566, 1216, 1070, 950 }, - .quant = { - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 5, 5, - 4, 4, 4, 4, 4, 5, 5, 6, - 4, 4, 4, 4, 5, 5, 6, 7, - 4, 4, 4, 4, 5, 6, 7, 7, - }, + .quant = QUANT_MAT_HQ, } // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 } }; @@ -147,6 +172,7 @@ typedef struct ProresContext { DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16]; int16_t quants[MAX_STORED_Q][64]; int16_t custom_q[64]; + const uint8_t *quant_mat; ProresDSPContext dsp; ScanTable scantable; @@ -159,6 +185,9 @@ typedef struct ProresContext { int num_planes; int bits_per_mb; + char *vendor; + int quant_sel; + int frame_size; int profile; @@ -373,7 +402,7 @@ static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, } else { qmat = ctx->custom_q; for (i = 0; i < 64; i++) - qmat[i] = ctx->profile_info->quant[i] * quant; + qmat[i] = ctx->quant_mat[i] * quant; } for (i = 0; i < ctx->num_planes; i++) { @@ -591,7 +620,7 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic, } else { qmat = ctx->custom_q; for (i = 0; i < 64; i++) - qmat[i] = ctx->profile_info->quant[i] * q; + qmat[i] = ctx->quant_mat[i] * q; } for (i = 0; i < ctx->num_planes; i++) { bits += estimate_slice_plane(ctx, &error, i, @@ -684,7 +713,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, tmp = buf; buf += 2; // frame header size will be stored here bytestream_put_be16 (&buf, 0); // version 1 - bytestream_put_buffer(&buf, "Lavc", 4); // creator + bytestream_put_buffer(&buf, ctx->vendor, 4); bytestream_put_be16 (&buf, avctx->width); bytestream_put_be16 (&buf, avctx->height); bytestream_put_byte (&buf, ctx->chroma_factor << 6); // frame flags @@ -694,13 +723,17 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, bytestream_put_byte (&buf, avctx->colorspace); bytestream_put_byte (&buf, 0x40); // source format and alpha information bytestream_put_byte (&buf, 0); // reserved - bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present - // luma quantisation matrix - for (i = 0; i < 64; i++) - bytestream_put_byte(&buf, ctx->profile_info->quant[i]); - // chroma quantisation matrix - for (i = 0; i < 64; i++) - bytestream_put_byte(&buf, ctx->profile_info->quant[i]); + if (ctx->quant_sel != QUANT_MAT_DEFAULT) { + bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present + // luma quantisation matrix + for (i = 0; i < 64; i++) + bytestream_put_byte(&buf, ctx->quant_mat[i]); + // chroma quantisation matrix + for (i = 0; i < 64; i++) + bytestream_put_byte(&buf, ctx->quant_mat[i]); + } else { + bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used + } bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size // picture header @@ -816,10 +849,25 @@ static av_cold int encode_init(AVCodecContext *avctx) ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps); ctx->num_slices = ctx->mb_height * ctx->slices_width; - for (i = 0; i < NUM_MB_LIMITS - 1; i++) - if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height) - break; - ctx->bits_per_mb = ctx->profile_info->br_tab[i]; + if (ctx->quant_sel == -1) + ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant]; + else + ctx->quant_mat = prores_quant_matrices[ctx->quant_sel]; + + if (strlen(ctx->vendor) != 4) { + av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n"); + return AVERROR_INVALIDDATA; + } + + if (!ctx->bits_per_mb) { + for (i = 0; i < NUM_MB_LIMITS - 1; i++) + if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height) + break; + ctx->bits_per_mb = ctx->profile_info->br_tab[i]; + } else if (ctx->bits_per_mb < 128) { + av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n"); + return AVERROR_INVALIDDATA; + } ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes + (2 * mps * ctx->bits_per_mb) / 8) @@ -829,7 +877,7 @@ static av_cold int encode_init(AVCodecContext *avctx) max_quant = ctx->profile_info->max_quant; for (i = min_quant; i < MAX_STORED_Q; i++) { for (j = 0; j < 64; j++) - ctx->quants[i][j] = ctx->profile_info->quant[j] * i; + ctx->quants[i][j] = ctx->quant_mat[j] * i; } avctx->codec_tag = ctx->profile_info->tag; @@ -877,6 +925,24 @@ static const AVOption options[] = { 0, 0, VE, "profile" }, { "hq", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ }, 0, 0, VE, "profile" }, + { "vendor", "vendor ID", OFFSET(vendor), + AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE }, + { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb), + AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE }, + { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT, + { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" }, + { "auto", NULL, 0, AV_OPT_TYPE_CONST, { -1 }, + 0, 0, VE, "quant_mat" }, + { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY }, + 0, 0, VE, "quant_mat" }, + { "lt", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT }, + 0, 0, VE, "quant_mat" }, + { "standard", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD }, + 0, 0, VE, "quant_mat" }, + { "hq", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ }, + 0, 0, VE, "quant_mat" }, + { "default", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT }, + 0, 0, VE, "quant_mat" }, { NULL } }; -- cgit v1.2.3 From 02beb9826b29166b5d7c9b306ac1648abb449be0 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 27 Feb 2012 18:52:13 +0100 Subject: lavc: deprecate AVCodecContext.sub_id. In most places where it's used, it's as a pointless write-only field. Only rv10 decoder actually reads from it, but it stores some internal version info in it. There is no reason for it to be in a public field. --- libavcodec/avcodec.h | 11 ++++------- libavcodec/mpeg12.c | 4 ---- libavcodec/mpegaudiodec.c | 2 -- libavcodec/mpegaudiodecheader.c | 1 - libavcodec/mpegvideo_parser.c | 2 -- libavcodec/options.c | 2 ++ libavcodec/pthread.c | 1 - libavcodec/rv10.c | 38 +++++++++++++++++++++++--------------- libavcodec/version.h | 3 +++ 9 files changed, 32 insertions(+), 32 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 3598aaaa85..491fb16a70 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -1228,15 +1228,12 @@ typedef struct AVCodecContext { */ unsigned int stream_codec_tag; +#if FF_API_SUB_ID /** - * Some codecs need additional format info. It is stored here. - * If any muxer uses this then ALL demuxers/parsers AND encoders for the - * specific codec MUST set it correctly otherwise stream copy breaks. - * In general use of this field by muxers is not recommended. - * - encoding: Set by libavcodec. - * - decoding: Set by libavcodec. (FIXME: Is this OK?) + * @deprecated this field is unused */ - int sub_id; + attribute_deprecated int sub_id; +#endif void *priv_data; diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index a5dafbd1fc..c49343f4b2 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -1237,7 +1237,6 @@ static int mpeg_decode_postinit(AVCodecContext *avctx) * that behave like P-frames. */ avctx->has_b_frames = !s->low_delay; - assert((avctx->sub_id == 1) == (avctx->codec_id == CODEC_ID_MPEG1VIDEO)); if (avctx->codec_id == CODEC_ID_MPEG1VIDEO) { //MPEG-1 fps avctx->time_base.den = avpriv_frame_rate_tab[s->frame_rate_index].num; @@ -1382,7 +1381,6 @@ static void mpeg_decode_sequence_extension(Mpeg1Context *s1) av_dlog(s->avctx, "sequence extension\n"); s->codec_id = s->avctx->codec_id = CODEC_ID_MPEG2VIDEO; - s->avctx->sub_id = 2; /* indicates MPEG-2 found */ if (s->avctx->debug & FF_DEBUG_PICT_INFO) av_log(s->avctx, AV_LOG_DEBUG, "profile: %d, level: %d vbv buffer: %d, bitrate:%d\n", @@ -2000,7 +1998,6 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, s->frame_pred_frame_dct = 1; s->chroma_format = 1; s->codec_id = s->avctx->codec_id = CODEC_ID_MPEG1VIDEO; - avctx->sub_id = 1; /* indicates MPEG-1 */ s->out_format = FMT_MPEG1; s->swap_uv = 0; // AFAIK VCR2 does not have SEQ_HEADER if (s->flags & CODEC_FLAG_LOW_DELAY) @@ -2060,7 +2057,6 @@ static int vcr2_init_sequence(AVCodecContext *avctx) s->frame_pred_frame_dct = 1; s->chroma_format = 1; s->codec_id = s->avctx->codec_id = CODEC_ID_MPEG2VIDEO; - avctx->sub_id = 2; /* indicates MPEG-2 */ s1->save_width = s->width; s1->save_height = s->height; s1->save_progressive_seq = s->progressive_sequence; diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index d6a09c86a8..dd43beb4fe 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -1660,7 +1660,6 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr, avctx->channel_layout = s->nb_channels == 1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO; if (!avctx->bit_rate) avctx->bit_rate = s->bit_rate; - avctx->sub_id = s->layer; if (s->frame_size <= 0 || s->frame_size > buf_size) { av_log(avctx, AV_LOG_ERROR, "incomplete frame\n"); @@ -1733,7 +1732,6 @@ static int decode_frame_adu(AVCodecContext *avctx, void *data, avctx->channels = s->nb_channels; if (!avctx->bit_rate) avctx->bit_rate = s->bit_rate; - avctx->sub_id = s->layer; s->frame_size = len; diff --git a/libavcodec/mpegaudiodecheader.c b/libavcodec/mpegaudiodecheader.c index dbd67ff0e3..428137ddaf 100644 --- a/libavcodec/mpegaudiodecheader.c +++ b/libavcodec/mpegaudiodecheader.c @@ -142,6 +142,5 @@ int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_r *sample_rate = s->sample_rate; *channels = s->nb_channels; *bit_rate = s->bit_rate; - avctx->sub_id = s->layer; return s->frame_size; } diff --git a/libavcodec/mpegvideo_parser.c b/libavcodec/mpegvideo_parser.c index f0b3b202eb..af4b6e42ba 100644 --- a/libavcodec/mpegvideo_parser.c +++ b/libavcodec/mpegvideo_parser.c @@ -69,7 +69,6 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, pc->frame_rate.num = avctx->time_base.num = avpriv_frame_rate_tab[frame_rate_index].den; avctx->bit_rate = ((buf[4]<<10) | (buf[5]<<2) | (buf[6]>>6))*400; avctx->codec_id = CODEC_ID_MPEG1VIDEO; - avctx->sub_id = 1; } break; case EXT_START_CODE: @@ -94,7 +93,6 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, avctx->time_base.den = pc->frame_rate.den * (frame_rate_ext_n + 1) * 2; avctx->time_base.num = pc->frame_rate.num * (frame_rate_ext_d + 1); avctx->codec_id = CODEC_ID_MPEG2VIDEO; - avctx->sub_id = 2; /* forces MPEG2 */ } break; case 0x8: /* picture coding extension */ diff --git a/libavcodec/options.c b/libavcodec/options.c index d25b64aee8..51187b842c 100644 --- a/libavcodec/options.c +++ b/libavcodec/options.c @@ -109,7 +109,9 @@ static const AVOption options[]={ #endif {"noout", "skip bitstream encoding", 0, AV_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"}, {"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"}, +#if FF_API_SUB_ID {"sub_id", NULL, OFFSET(sub_id), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX}, +#endif {"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.dbl = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"}, {"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.dbl = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" }, {"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.dbl = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" }, diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c index f17f34e8e1..4a02823dde 100644 --- a/libavcodec/pthread.c +++ b/libavcodec/pthread.c @@ -402,7 +402,6 @@ static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, int err = 0; if (dst != src) { - dst->sub_id = src->sub_id; dst->time_base = src->time_base; dst->width = src->width; dst->height = src->height; diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c index 2b1a09dc69..577522504c 100644 --- a/libavcodec/rv10.c +++ b/libavcodec/rv10.c @@ -40,6 +40,11 @@ #define DC_VLC_BITS 14 //FIXME find a better solution +typedef struct RVDecContext { + MpegEncContext m; + int sub_id; +} RVDecContext; + static const uint16_t rv_lum_code[256] = { 0x3e7f, 0x0f00, 0x0f01, 0x0f02, 0x0f03, 0x0f04, 0x0f05, 0x0f06, @@ -293,8 +298,9 @@ static int rv10_decode_picture_header(MpegEncContext *s) return mb_count; } -static int rv20_decode_picture_header(MpegEncContext *s) +static int rv20_decode_picture_header(RVDecContext *rv) { + MpegEncContext *s = &rv->m; int seq, mb_pos, i; int rpr_bits; @@ -325,10 +331,10 @@ static int rv20_decode_picture_header(MpegEncContext *s) return -1; } - if(RV_GET_MINOR_VER(s->avctx->sub_id) >= 2) + if(RV_GET_MINOR_VER(rv->sub_id) >= 2) s->loop_filter = get_bits1(&s->gb); - if(RV_GET_MINOR_VER(s->avctx->sub_id) <= 1) + if(RV_GET_MINOR_VER(rv->sub_id) <= 1) seq = get_bits(&s->gb, 8) << 7; else seq = get_bits(&s->gb, 13) << 2; @@ -393,7 +399,7 @@ static int rv20_decode_picture_header(MpegEncContext *s) av_log(s->avctx, AV_LOG_DEBUG, "\n");*/ s->no_rounding= get_bits1(&s->gb); - if(RV_GET_MINOR_VER(s->avctx->sub_id) <= 1 && s->pict_type == AV_PICTURE_TYPE_B) + if(RV_GET_MINOR_VER(rv->sub_id) <= 1 && s->pict_type == AV_PICTURE_TYPE_B) skip_bits(&s->gb, 5); // binary decoder reads 3+2 bits here but they don't seem to be used s->f_code = 1; @@ -418,7 +424,8 @@ av_log(s->avctx, AV_LOG_DEBUG, "\n");*/ static av_cold int rv10_decode_init(AVCodecContext *avctx) { - MpegEncContext *s = avctx->priv_data; + RVDecContext *rv = avctx->priv_data; + MpegEncContext *s = &rv->m; static int done=0; int major_ver, minor_ver, micro_ver; @@ -438,11 +445,11 @@ static av_cold int rv10_decode_init(AVCodecContext *avctx) s->orig_height= s->height = avctx->coded_height; s->h263_long_vectors= ((uint8_t*)avctx->extradata)[3] & 1; - avctx->sub_id= AV_RB32((uint8_t*)avctx->extradata + 4); + rv->sub_id = AV_RB32((uint8_t*)avctx->extradata + 4); - major_ver = RV_GET_MAJOR_VER(avctx->sub_id); - minor_ver = RV_GET_MINOR_VER(avctx->sub_id); - micro_ver = RV_GET_MICRO_VER(avctx->sub_id); + major_ver = RV_GET_MAJOR_VER(rv->sub_id); + minor_ver = RV_GET_MINOR_VER(rv->sub_id); + micro_ver = RV_GET_MICRO_VER(rv->sub_id); s->low_delay = 1; switch (major_ver) { @@ -457,13 +464,13 @@ static av_cold int rv10_decode_init(AVCodecContext *avctx) } break; default: - av_log(s->avctx, AV_LOG_ERROR, "unknown header %X\n", avctx->sub_id); + av_log(s->avctx, AV_LOG_ERROR, "unknown header %X\n", rv->sub_id); av_log_missing_feature(avctx, "RV1/2 version", 1); return AVERROR_PATCHWELCOME; } if(avctx->debug & FF_DEBUG_PICT_INFO){ - av_log(avctx, AV_LOG_DEBUG, "ver:%X ver0:%X\n", avctx->sub_id, avctx->extradata_size >= 4 ? ((uint32_t*)avctx->extradata)[0] : -1); + av_log(avctx, AV_LOG_DEBUG, "ver:%X ver0:%X\n", rv->sub_id, avctx->extradata_size >= 4 ? ((uint32_t*)avctx->extradata)[0] : -1); } avctx->pix_fmt = PIX_FMT_YUV420P; @@ -498,7 +505,8 @@ static av_cold int rv10_decode_end(AVCodecContext *avctx) static int rv10_decode_packet(AVCodecContext *avctx, const uint8_t *buf, int buf_size, int buf_size2) { - MpegEncContext *s = avctx->priv_data; + RVDecContext *rv = avctx->priv_data; + MpegEncContext *s = &rv->m; int mb_count, mb_pos, left, start_mb_x, active_bits_size; active_bits_size = buf_size * 8; @@ -506,7 +514,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, if(s->codec_id ==CODEC_ID_RV10) mb_count = rv10_decode_picture_header(s); else - mb_count = rv20_decode_picture_header(s); + mb_count = rv20_decode_picture_header(rv); if (mb_count < 0) { av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n"); return -1; @@ -714,7 +722,7 @@ AVCodec ff_rv10_decoder = { .name = "rv10", .type = AVMEDIA_TYPE_VIDEO, .id = CODEC_ID_RV10, - .priv_data_size = sizeof(MpegEncContext), + .priv_data_size = sizeof(RVDecContext), .init = rv10_decode_init, .close = rv10_decode_end, .decode = rv10_decode_frame, @@ -728,7 +736,7 @@ AVCodec ff_rv20_decoder = { .name = "rv20", .type = AVMEDIA_TYPE_VIDEO, .id = CODEC_ID_RV20, - .priv_data_size = sizeof(MpegEncContext), + .priv_data_size = sizeof(RVDecContext), .init = rv10_decode_init, .close = rv10_decode_end, .decode = rv10_decode_frame, diff --git a/libavcodec/version.h b/libavcodec/version.h index f3b40ca511..6fbe01c3a8 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -59,5 +59,8 @@ #ifndef FF_API_INTER_THRESHOLD #define FF_API_INTER_THRESHOLD (LIBAVCODEC_VERSION_MAJOR < 55) #endif +#ifndef FF_API_SUB_ID +#define FF_API_SUB_ID (LIBAVCODEC_VERSION_MAJOR < 55) +#endif #endif /* AVCODEC_VERSION_H */ -- cgit v1.2.3 From 44fe77b350fd812c84cd866b7d03e436acc3bab2 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 2 Mar 2012 17:00:53 +0100 Subject: lavc: make codec_is_decoder/encoder() public. --- doc/APIchanges | 3 +++ libavcodec/avcodec.h | 10 ++++++++++ libavcodec/utils.c | 18 +++++++++--------- libavcodec/version.h | 2 +- 4 files changed, 23 insertions(+), 10 deletions(-) (limited to 'libavcodec') diff --git a/doc/APIchanges b/doc/APIchanges index 00cee5ad90..ef630e8e2a 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -12,6 +12,9 @@ libavutil: 2011-04-18 API changes, most recent first: +2012-03-xx - xxxxxxx - lavc 54.7.0 - avcodec.h + Add av_codec_is_encoder/decoder(). + 2012-xx-xx - xxxxxxx - lavc 54.3.0 - avcodec.h Add av_packet_shrink_side_data. diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 491fb16a70..a99dcbd553 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -4297,4 +4297,14 @@ const AVClass *avcodec_get_class(void); */ int avcodec_is_open(AVCodecContext *s); +/** + * @return a non-zero number if codec is an encoder, zero otherwise + */ +int av_codec_is_encoder(AVCodec *codec); + +/** + * @return a non-zero number if codec is a decoder, zero otherwise + */ +int av_codec_is_decoder(AVCodec *codec); + #endif /* AVCODEC_AVCODEC_H */ diff --git a/libavcodec/utils.c b/libavcodec/utils.c index f9927a1383..d4384108df 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -112,12 +112,12 @@ static void avcodec_init(void) ff_dsputil_static_init(); } -static av_always_inline int codec_is_encoder(AVCodec *codec) +int av_codec_is_encoder(AVCodec *codec) { return codec && (codec->encode || codec->encode2); } -static av_always_inline int codec_is_decoder(AVCodec *codec) +int av_codec_is_decoder(AVCodec *codec) { return codec && codec->decode; } @@ -704,7 +704,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD /* if the decoder init function was already called previously, free the already allocated subtitle_header before overwriting it */ - if (codec_is_decoder(codec)) + if (av_codec_is_decoder(codec)) av_freep(&avctx->subtitle_header); #define SANE_NB_CHANNELS 128U @@ -748,7 +748,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD ret = AVERROR(EINVAL); goto free_and_end; } - if (codec_is_encoder(avctx->codec)) { + if (av_codec_is_encoder(avctx->codec)) { int i; if (avctx->codec->sample_fmts) { for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++) @@ -1367,7 +1367,7 @@ av_cold int avcodec_close(AVCodecContext *avctx) av_opt_free(avctx->priv_data); av_opt_free(avctx); av_freep(&avctx->priv_data); - if (codec_is_encoder(avctx->codec)) + if (av_codec_is_encoder(avctx->codec)) av_freep(&avctx->extradata); avctx->codec = NULL; avctx->active_thread_type = 0; @@ -1385,7 +1385,7 @@ AVCodec *avcodec_find_encoder(enum CodecID id) AVCodec *p, *experimental=NULL; p = first_avcodec; while (p) { - if (codec_is_encoder(p) && p->id == id) { + if (av_codec_is_encoder(p) && p->id == id) { if (p->capabilities & CODEC_CAP_EXPERIMENTAL && !experimental) { experimental = p; } else @@ -1403,7 +1403,7 @@ AVCodec *avcodec_find_encoder_by_name(const char *name) return NULL; p = first_avcodec; while (p) { - if (codec_is_encoder(p) && strcmp(name,p->name) == 0) + if (av_codec_is_encoder(p) && strcmp(name,p->name) == 0) return p; p = p->next; } @@ -1415,7 +1415,7 @@ AVCodec *avcodec_find_decoder(enum CodecID id) AVCodec *p; p = first_avcodec; while (p) { - if (codec_is_decoder(p) && p->id == id) + if (av_codec_is_decoder(p) && p->id == id) return p; p = p->next; } @@ -1429,7 +1429,7 @@ AVCodec *avcodec_find_decoder_by_name(const char *name) return NULL; p = first_avcodec; while (p) { - if (codec_is_decoder(p) && strcmp(name,p->name) == 0) + if (av_codec_is_decoder(p) && strcmp(name,p->name) == 0) return p; p = p->next; } diff --git a/libavcodec/version.h b/libavcodec/version.h index 6fbe01c3a8..1ecfb215f6 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -21,7 +21,7 @@ #define AVCODEC_VERSION_H #define LIBAVCODEC_VERSION_MAJOR 54 -#define LIBAVCODEC_VERSION_MINOR 6 +#define LIBAVCODEC_VERSION_MINOR 7 #define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ -- cgit v1.2.3