diff options
author | Mans Rullgard <mans@mansr.com> | 2013-01-23 20:24:53 -0800 |
---|---|---|
committer | Luca Barbato <lu_zero@gentoo.org> | 2013-01-24 10:44:43 +0100 |
commit | e9d817351b28e62868528476971afe4cde1fa280 (patch) | |
tree | f17f5c43d1ce5856a275dd4b40a970d15b7a9eac /libavcodec/x86 | |
parent | 7a95afe433b2a692f490b98948c082e62ffc1d27 (diff) |
dsputil: Separate h264 qpel
The sh4 optimizations are removed, because the code is
100% identical to the C code, so it is unlikely to
provide any real practical benefit.
Signed-off-by: Diego Biurrun <diego@biurrun.de>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/Makefile | 1 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 93 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel.c | 126 |
3 files changed, 131 insertions, 89 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 0bade86375..9b8b6531d0 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -7,6 +7,7 @@ OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o OBJS-$(CONFIG_FFT) += x86/fft_init.o OBJS-$(CONFIG_H264DSP) += x86/h264dsp_init.o OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o +OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o OBJS-$(CONFIG_LPC) += x86/lpc.o OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodec.o diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 2521b9f1cc..f9da04f6ea 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1690,7 +1690,10 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, } #endif /* HAVE_INLINE_ASM */ -#include "h264_qpel.c" +void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, + int line_size, int h); +void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, + int line_size, int h); void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); @@ -1882,22 +1885,6 @@ void ff_vector_clip_int32_sse4 (int32_t *dst, const int32_t *src, c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \ } while (0) -#define H264_QPEL_FUNCS(x, y, CPU) \ - do { \ - c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \ - c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \ - c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \ - c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \ - } while (0) - -#define H264_QPEL_FUNCS_10(x, y, CPU) \ - do { \ - c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ - c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ - c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ - c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ - } while (0) - static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) { const int high_bit_depth = avctx->bits_per_raw_sample > 8; @@ -2014,26 +2001,6 @@ static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_INLINE_ASM */ #if HAVE_MMXEXT_EXTERNAL - if (CONFIG_H264QPEL) { - if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); - } else if (bit_depth == 10) { -#if !ARCH_X86_64 - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); -#endif - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); - } - } - if (!high_bit_depth && CONFIG_H264CHROMA) { c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; @@ -2148,36 +2115,10 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, c->put_pixels_tab[0][0] = ff_put_pixels16_sse2; c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2; c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2; - if (CONFIG_H264QPEL) - H264_QPEL_FUNCS(0, 0, sse2); } } - if (!high_bit_depth && CONFIG_H264QPEL) { - H264_QPEL_FUNCS(0, 1, sse2); - H264_QPEL_FUNCS(0, 2, sse2); - H264_QPEL_FUNCS(0, 3, sse2); - H264_QPEL_FUNCS(1, 1, sse2); - H264_QPEL_FUNCS(1, 2, sse2); - H264_QPEL_FUNCS(1, 3, sse2); - H264_QPEL_FUNCS(2, 1, sse2); - H264_QPEL_FUNCS(2, 2, sse2); - H264_QPEL_FUNCS(2, 3, sse2); - H264_QPEL_FUNCS(3, 1, sse2); - H264_QPEL_FUNCS(3, 2, sse2); - H264_QPEL_FUNCS(3, 3, sse2); - } - if (bit_depth == 10) { - if (CONFIG_H264QPEL) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); - H264_QPEL_FUNCS_10(1, 0, sse2_cache64); - H264_QPEL_FUNCS_10(2, 0, sse2_cache64); - H264_QPEL_FUNCS_10(3, 0, sse2_cache64); - } if (CONFIG_H264CHROMA) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; @@ -2205,27 +2146,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, { #if HAVE_SSSE3_EXTERNAL const int high_bit_depth = avctx->bits_per_raw_sample > 8; - const int bit_depth = avctx->bits_per_raw_sample; - if (!high_bit_depth && CONFIG_H264QPEL) { - H264_QPEL_FUNCS(1, 0, ssse3); - H264_QPEL_FUNCS(1, 1, ssse3); - H264_QPEL_FUNCS(1, 2, ssse3); - H264_QPEL_FUNCS(1, 3, ssse3); - H264_QPEL_FUNCS(2, 0, ssse3); - H264_QPEL_FUNCS(2, 1, ssse3); - H264_QPEL_FUNCS(2, 2, ssse3); - H264_QPEL_FUNCS(2, 3, ssse3); - H264_QPEL_FUNCS(3, 0, ssse3); - H264_QPEL_FUNCS(3, 1, ssse3); - H264_QPEL_FUNCS(3, 2, ssse3); - H264_QPEL_FUNCS(3, 3, ssse3); - } - if (bit_depth == 10 && CONFIG_H264QPEL) { - H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); - H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); - H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); - } if (!high_bit_depth && CONFIG_H264CHROMA) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; @@ -2262,12 +2183,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) if (bit_depth == 10) { // AVX implies !cache64. // TODO: Port cache(32|64) detection from x264. - if (CONFIG_H264QPEL) { - H264_QPEL_FUNCS_10(1, 0, sse2); - H264_QPEL_FUNCS_10(2, 0, sse2); - H264_QPEL_FUNCS_10(3, 0, sse2); - } - if (CONFIG_H264CHROMA) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index bc56d091e1..bebf5a5f3d 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -22,6 +22,7 @@ #include "libavutil/cpu.h" #include "libavutil/x86/asm.h" #include "libavcodec/dsputil.h" +#include "libavcodec/h264qpel.h" #include "libavcodec/mpegvideo.h" #include "dsputil_mmx.h" @@ -490,3 +491,128 @@ QPEL16(mmxext) #endif #endif /* HAVE_YASM */ + +#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ + do { \ + c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ + } while (0) + +#define H264_QPEL_FUNCS(x, y, CPU) \ + do { \ + c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \ + c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \ + c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \ + c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \ + } while (0) + +#define H264_QPEL_FUNCS_10(x, y, CPU) \ + do { \ + c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ + c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ + c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ + c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ + } while (0) + +void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) +{ + int high_bit_depth = bit_depth > 8; + int mm_flags = av_get_cpu_flags(); + +#if HAVE_MMXEXT_EXTERNAL + if (!high_bit_depth) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); + } else if (bit_depth == 10) { +#if !ARCH_X86_64 + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); +#endif + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); + } +#endif + +#if HAVE_SSE2_EXTERNAL + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { + // these functions are slower than mmx on AMD, but faster on Intel + H264_QPEL_FUNCS(0, 0, sse2); + } + + if (!high_bit_depth) { + H264_QPEL_FUNCS(0, 1, sse2); + H264_QPEL_FUNCS(0, 2, sse2); + H264_QPEL_FUNCS(0, 3, sse2); + H264_QPEL_FUNCS(1, 1, sse2); + H264_QPEL_FUNCS(1, 2, sse2); + H264_QPEL_FUNCS(1, 3, sse2); + H264_QPEL_FUNCS(2, 1, sse2); + H264_QPEL_FUNCS(2, 2, sse2); + H264_QPEL_FUNCS(2, 3, sse2); + H264_QPEL_FUNCS(3, 1, sse2); + H264_QPEL_FUNCS(3, 2, sse2); + H264_QPEL_FUNCS(3, 3, sse2); + } + + if (bit_depth == 10) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); + H264_QPEL_FUNCS_10(1, 0, sse2_cache64); + H264_QPEL_FUNCS_10(2, 0, sse2_cache64); + H264_QPEL_FUNCS_10(3, 0, sse2_cache64); + } +#endif + +#if HAVE_SSSE3_EXTERNAL + if (!high_bit_depth) { + H264_QPEL_FUNCS(1, 0, ssse3); + H264_QPEL_FUNCS(1, 1, ssse3); + H264_QPEL_FUNCS(1, 2, ssse3); + H264_QPEL_FUNCS(1, 3, ssse3); + H264_QPEL_FUNCS(2, 0, ssse3); + H264_QPEL_FUNCS(2, 1, ssse3); + H264_QPEL_FUNCS(2, 2, ssse3); + H264_QPEL_FUNCS(2, 3, ssse3); + H264_QPEL_FUNCS(3, 0, ssse3); + H264_QPEL_FUNCS(3, 1, ssse3); + H264_QPEL_FUNCS(3, 2, ssse3); + H264_QPEL_FUNCS(3, 3, ssse3); + } + + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); + } +#endif + +#if HAVE_AVX_EXTERNAL + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, sse2); + H264_QPEL_FUNCS_10(2, 0, sse2); + H264_QPEL_FUNCS_10(3, 0, sse2); + } +#endif +} |