diff options
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/vc1dsp_init_neon.c | 105 | ||||
-rw-r--r-- | libavcodec/vc1dec.c | 16 | ||||
-rw-r--r-- | libavcodec/vc1dsp.c | 141 | ||||
-rw-r--r-- | libavcodec/vc1dsp.h | 4 | ||||
-rw-r--r-- | libavcodec/x86/vc1dsp_init.c | 22 | ||||
-rw-r--r-- | libavcodec/x86/vc1dsp_mmx.c | 106 |
6 files changed, 259 insertions, 135 deletions
diff --git a/libavcodec/arm/vc1dsp_init_neon.c b/libavcodec/arm/vc1dsp_init_neon.c index f562344b85..47ae34b056 100644 --- a/libavcodec/arm/vc1dsp_init_neon.c +++ b/libavcodec/arm/vc1dsp_init_neon.c @@ -35,40 +35,38 @@ void ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, int linesize, int16_t *block); void ff_put_pixels8x8_neon(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd); -void ff_put_vc1_mspel_mc10_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc20_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc30_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); - -void ff_put_vc1_mspel_mc01_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc02_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc03_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); - -void ff_put_vc1_mspel_mc11_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc12_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc13_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); - -void ff_put_vc1_mspel_mc21_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc22_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc23_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); - -void ff_put_vc1_mspel_mc31_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc32_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); -void ff_put_vc1_mspel_mc33_neon(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd); +#define DECL_PUT(X, Y) \ +void ff_put_vc1_mspel_mc##X##Y##_neon(uint8_t *dst, const uint8_t *src, \ + ptrdiff_t stride, int rnd); \ +void ff_put_vc1_mspel_mc##X##Y##_16_neon(uint8_t *dst, const uint8_t *src, \ + ptrdiff_t stride, int rnd) \ +{ \ + ff_put_vc1_mspel_mc##X##Y##_neon(dst+0, src+0, stride, rnd); \ + ff_put_vc1_mspel_mc##X##Y##_neon(dst+8, src+8, stride, rnd); \ + dst += 8*stride; src += 8*stride; \ + ff_put_vc1_mspel_mc##X##Y##_neon(dst+0, src+0, stride, rnd); \ + ff_put_vc1_mspel_mc##X##Y##_neon(dst+8, src+8, stride, rnd); \ +} + +DECL_PUT(1, 0) +DECL_PUT(2, 0) +DECL_PUT(3, 0) + +DECL_PUT(0, 1) +DECL_PUT(0, 2) +DECL_PUT(0, 3) + +DECL_PUT(1, 1) +DECL_PUT(1, 2) +DECL_PUT(1, 3) + +DECL_PUT(2, 1) +DECL_PUT(2, 2) +DECL_PUT(2, 3) + +DECL_PUT(3, 1) +DECL_PUT(3, 2) +DECL_PUT(3, 3) void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); @@ -79,6 +77,10 @@ void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); +#define FN_ASSIGN(X, Y) \ + dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = ff_put_vc1_mspel_mc##X##Y##_16_neon; + dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = ff_put_vc1_mspel_mc##X##Y##_neon + av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp) { dsp->vc1_inv_trans_8x8 = ff_vc1_inv_trans_8x8_neon; @@ -90,22 +92,25 @@ av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp) dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_neon; dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_neon; - dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_pixels8x8_neon; - dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_neon; - dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_neon; - dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_neon; - dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_neon; - dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_neon; - dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_neon; - dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_neon; - dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_neon; - dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_neon; - dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_neon; - dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_neon; - dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_neon; - dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_neon; - dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_neon; - dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_neon; + dsp->put_vc1_mspel_pixels_tab[0][ 0] = ff_put_pixels16x16_neon; + dsp->put_vc1_mspel_pixels_tab[1][ 0] = ff_put_pixels8x8_neon; + FN_ASSIGN(2, 0); + FN_ASSIGN(3, 0); + + FN_ASSIGN(0, 1); + FN_ASSIGN(1, 1); + FN_ASSIGN(2, 1); + FN_ASSIGN(3, 1); + + FN_ASSIGN(0, 2); + FN_ASSIGN(1, 2); + FN_ASSIGN(2, 2); + FN_ASSIGN(3, 2); + + FN_ASSIGN(0, 3); + FN_ASSIGN(1, 3); + FN_ASSIGN(2, 3); + FN_ASSIGN(3, 3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon; diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 1a45e40d88..d5e8a37d78 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -510,11 +510,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir) if (s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); - srcY += s->linesize * 8; - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0] , srcY , s->linesize, v->rnd); } else { // hpel mc - always used for luma dxy = (my & 2) | ((mx & 2) >> 1); if (!v->rnd) @@ -728,9 +724,9 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg) if (s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); if (avg) - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); else - v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd); } else { // hpel mc - always used for luma dxy = (my & 2) | ((mx & 2) >> 1); if (!v->rnd) @@ -2039,11 +2035,7 @@ static void vc1_interp_mc(VC1Context *v) if (s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off , srcY , s->linesize, v->rnd); - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8, srcY + 8, s->linesize, v->rnd); - srcY += s->linesize * 8; - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize , srcY , s->linesize, v->rnd); - v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0] + off , srcY , s->linesize, v->rnd); } else { // hpel mc dxy = (my & 2) | ((mx & 2) >> 1); diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index c37b9560ae..acc3aea42b 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -643,6 +643,64 @@ static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \ src += stride; \ } \ }\ +static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst, \ + const uint8_t *src, \ + ptrdiff_t stride, \ + int hmode, \ + int vmode, \ + int rnd) \ +{ \ + int i, j; \ + \ + if (vmode) { /* Horizontal filter to apply */ \ + int r; \ + \ + if (hmode) { /* Vertical filter to apply, output to tmp */ \ + static const int shift_value[] = { 0, 5, 1, 5 }; \ + int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \ + int16_t tmp[19 * 16], *tptr = tmp; \ + \ + r = (1 << (shift - 1)) + rnd - 1; \ + \ + src -= 1; \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 19; i++) \ + tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \ + src += stride; \ + tptr += 19; \ + } \ + \ + r = 64 - rnd; \ + tptr = tmp + 1; \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 16; i++) \ + OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \ + dst += stride; \ + tptr += 19; \ + } \ + \ + return; \ + } else { /* No horizontal filter, output 8 lines to dst */ \ + r = 1 - rnd; \ + \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 16; i++) \ + OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \ + src += stride; \ + dst += stride; \ + } \ + return; \ + } \ + } \ + \ + /* Horizontal mode with no vertical mode */ \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 16; i++) \ + OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \ + dst += stride; \ + src += stride; \ + } \ +}\ static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\ int i;\ for(i=0; i<8; i++){\ @@ -651,6 +709,17 @@ static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff pixels+=line_size;\ block +=line_size;\ }\ +}\ +static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\ + int i;\ + for(i=0; i<16; i++){\ + OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\ + OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\ + OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\ + OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\ + pixels+=line_size;\ + block +=line_size;\ + }\ } #define op_put(a, b) a = av_clip_uint8(b) @@ -675,6 +744,18 @@ static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \ ptrdiff_t stride, int rnd) \ { \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ +} \ +static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \ + const uint8_t *src, \ + ptrdiff_t stride, int rnd) \ +{ \ + put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ +} \ +static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \ + const uint8_t *src, \ + ptrdiff_t stride, int rnd) \ +{ \ + avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ } PUT_VC1_MSPEL(1, 0) @@ -878,6 +959,11 @@ static void sprite_v_double_twoscale_c(uint8_t *dst, } #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ +#define FN_ASSIGN(X, Y) \ + dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \ + dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \ + dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \ + dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) { @@ -902,39 +988,28 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c; - dsp->put_vc1_mspel_pixels_tab[0] = put_pixels8x8_c; - dsp->put_vc1_mspel_pixels_tab[1] = put_vc1_mspel_mc10_c; - dsp->put_vc1_mspel_pixels_tab[2] = put_vc1_mspel_mc20_c; - dsp->put_vc1_mspel_pixels_tab[3] = put_vc1_mspel_mc30_c; - dsp->put_vc1_mspel_pixels_tab[4] = put_vc1_mspel_mc01_c; - dsp->put_vc1_mspel_pixels_tab[5] = put_vc1_mspel_mc11_c; - dsp->put_vc1_mspel_pixels_tab[6] = put_vc1_mspel_mc21_c; - dsp->put_vc1_mspel_pixels_tab[7] = put_vc1_mspel_mc31_c; - dsp->put_vc1_mspel_pixels_tab[8] = put_vc1_mspel_mc02_c; - dsp->put_vc1_mspel_pixels_tab[9] = put_vc1_mspel_mc12_c; - dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c; - dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c; - dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c; - dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c; - dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c; - dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c; - - dsp->avg_vc1_mspel_pixels_tab[0] = avg_pixels8x8_c; - dsp->avg_vc1_mspel_pixels_tab[1] = avg_vc1_mspel_mc10_c; - dsp->avg_vc1_mspel_pixels_tab[2] = avg_vc1_mspel_mc20_c; - dsp->avg_vc1_mspel_pixels_tab[3] = avg_vc1_mspel_mc30_c; - dsp->avg_vc1_mspel_pixels_tab[4] = avg_vc1_mspel_mc01_c; - dsp->avg_vc1_mspel_pixels_tab[5] = avg_vc1_mspel_mc11_c; - dsp->avg_vc1_mspel_pixels_tab[6] = avg_vc1_mspel_mc21_c; - dsp->avg_vc1_mspel_pixels_tab[7] = avg_vc1_mspel_mc31_c; - dsp->avg_vc1_mspel_pixels_tab[8] = avg_vc1_mspel_mc02_c; - dsp->avg_vc1_mspel_pixels_tab[9] = avg_vc1_mspel_mc12_c; - dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c; - dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c; - dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c; - dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c; - dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c; - dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c; + dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c; + dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c; + dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c; + dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c; + FN_ASSIGN(0, 1); + FN_ASSIGN(0, 2); + FN_ASSIGN(0, 3); + + FN_ASSIGN(1, 0); + FN_ASSIGN(1, 1); + FN_ASSIGN(1, 2); + FN_ASSIGN(1, 3); + + FN_ASSIGN(2, 0); + FN_ASSIGN(2, 1); + FN_ASSIGN(2, 2); + FN_ASSIGN(2, 3); + + FN_ASSIGN(3, 0); + FN_ASSIGN(3, 1); + FN_ASSIGN(3, 2); + FN_ASSIGN(3, 3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_c; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_c; diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h index 1aef32a306..7cf613597c 100644 --- a/libavcodec/vc1dsp.h +++ b/libavcodec/vc1dsp.h @@ -57,8 +57,8 @@ typedef struct VC1DSPContext { /* put 8x8 block with bicubic interpolation and quarterpel precision * last argument is actually round value instead of height */ - vc1op_pixels_func put_vc1_mspel_pixels_tab[16]; - vc1op_pixels_func avg_vc1_mspel_pixels_tab[16]; + vc1op_pixels_func put_vc1_mspel_pixels_tab[2][16]; + vc1op_pixels_func avg_vc1_mspel_pixels_tab[2][16]; /* This is really one func used in VC-1 decoding */ h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]; diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c index 9256be3816..d81c28451e 100644 --- a/libavcodec/x86/vc1dsp_init.c +++ b/libavcodec/x86/vc1dsp_init.c @@ -64,11 +64,28 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq) ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq); } +static void avg_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride, int rnd) +{ + ff_avg_pixels8_mmx(dst, src, stride, 8); +} static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd) { ff_avg_pixels8_mmxext(dst, src, stride, 8); } + +static void avg_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride, int rnd) +{ + ff_avg_pixels16_mmx(dst, src, stride, 16); +} +static void avg_vc1_mspel_mc00_16_sse2(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride, int rnd) +{ + ff_avg_pixels16_sse2(dst, src, stride, 16); +} + #endif /* HAVE_YASM */ void ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src, @@ -104,6 +121,8 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) #if HAVE_YASM if (EXTERNAL_MMX(cpu_flags)) { dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; + dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_mmx; + dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; @@ -112,13 +131,14 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) ASSIGN_LF(mmxext); dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; - dsp->avg_vc1_mspel_pixels_tab[0] = avg_vc1_mspel_mc00_mmxext; + dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; + dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { ASSIGN_LF(ssse3); diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index c06cb14496..d9108fc832 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -462,6 +462,15 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ \ /* Horizontal mode with no vertical mode */\ vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);\ +} \ +static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \ + int stride, int hmode, int vmode, int rnd)\ +{ \ + OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ + OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ + dst += 8*stride; src += 8*stride; \ + OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ + OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ } VC1_MSPEL_MC(put_) @@ -482,6 +491,20 @@ static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ int rnd) \ { \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ +}\ +static void put_vc1_mspel_mc ## a ## b ## _16_mmx(uint8_t *dst, \ + const uint8_t *src, \ + ptrdiff_t stride, \ + int rnd) \ +{ \ + put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ +}\ +static void avg_vc1_mspel_mc ## a ## b ## _16_mmxext(uint8_t *dst, \ + const uint8_t *src,\ + ptrdiff_t stride, \ + int rnd) \ +{ \ + avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ } DECLARE_FUNCTION(0, 1) @@ -710,50 +733,59 @@ static void put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, { ff_put_pixels8_mmx(dst, src, stride, 8); } +static void put_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride, int rnd) +{ + ff_put_pixels16_mmx(dst, src, stride, 16); +} + +#define FN_ASSIGN(OP, X, Y, INSN) \ + dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \ + dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) { - dsp->put_vc1_mspel_pixels_tab[ 0] = put_vc1_mspel_mc00_mmx; - dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx; - dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx; - dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx; - - dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx; - dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx; - dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx; - dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx; - - dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx; - dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx; - dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx; - dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx; - - dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx; - dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx; - dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx; - dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx; + FN_ASSIGN(put_, 0, 0, _mmx); + FN_ASSIGN(put_, 0, 1, _mmx); + FN_ASSIGN(put_, 0, 2, _mmx); + FN_ASSIGN(put_, 0, 3, _mmx); + + FN_ASSIGN(put_, 1, 0, _mmx); + FN_ASSIGN(put_, 1, 1, _mmx); + FN_ASSIGN(put_, 1, 2, _mmx); + FN_ASSIGN(put_, 1, 3, _mmx); + + FN_ASSIGN(put_, 2, 0, _mmx); + FN_ASSIGN(put_, 2, 1, _mmx); + FN_ASSIGN(put_, 2, 2, _mmx); + FN_ASSIGN(put_, 2, 3, _mmx); + + FN_ASSIGN(put_, 3, 0, _mmx); + FN_ASSIGN(put_, 3, 1, _mmx); + FN_ASSIGN(put_, 3, 2, _mmx); + FN_ASSIGN(put_, 3, 3, _mmx); } av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp) { - dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext; - dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext; - - dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmxext; - dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmxext; - - dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmxext; - dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmxext; - dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmxext; - - dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmxext; - dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmxext; - dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmxext; + FN_ASSIGN(avg_, 0, 1, _mmxext); + FN_ASSIGN(avg_, 0, 2, _mmxext); + FN_ASSIGN(avg_, 0, 3, _mmxext); + + FN_ASSIGN(avg_, 1, 0, _mmxext); + FN_ASSIGN(avg_, 1, 1, _mmxext); + FN_ASSIGN(avg_, 1, 2, _mmxext); + FN_ASSIGN(avg_, 1, 3, _mmxext); + + FN_ASSIGN(avg_, 2, 0, _mmxext); + FN_ASSIGN(avg_, 2, 1, _mmxext); + FN_ASSIGN(avg_, 2, 2, _mmxext); + FN_ASSIGN(avg_, 2, 3, _mmxext); + + FN_ASSIGN(avg_, 3, 0, _mmxext); + FN_ASSIGN(avg_, 3, 1, _mmxext); + FN_ASSIGN(avg_, 3, 2, _mmxext); + FN_ASSIGN(avg_, 3, 3, _mmxext); dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext; dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext; |