summaryrefslogtreecommitdiff
path: root/libavcodec/x86/dsputil_mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/dsputil_mmx.c')
-rw-r--r--libavcodec/x86/dsputil_mmx.c136
1 files changed, 86 insertions, 50 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 80bb6cd26c..13530418ff 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2530,44 +2530,56 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
}
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
- c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
-
- SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
+ c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU
+
+ SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, );
if (!high_bit_depth) {
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
}
+#if HAVE_YASM
+ else if (bit_depth == 10) {
+#if !ARCH_X86_64
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_);
+#endif
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
+ }
+#endif
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
#if HAVE_YASM
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
@@ -2627,26 +2639,26 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
}
- SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, );
if (!high_bit_depth) {
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, );
}
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
#if HAVE_YASM
if (!high_bit_depth) {
@@ -2690,7 +2702,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 3, sse2);
}
#if HAVE_YASM
+#define H264_QPEL_FUNCS_10(x, y, CPU)\
+ c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU;\
+ c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU;\
+ c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU;\
+ c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU;
if (bit_depth == 10) {
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
+ H264_QPEL_FUNCS_10(1, 0, sse2_cache64)
+ H264_QPEL_FUNCS_10(2, 0, sse2_cache64)
+ H264_QPEL_FUNCS_10(3, 0, sse2_cache64)
+
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2;
}
@@ -2712,6 +2737,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
}
+ else if (bit_depth == 10) {
+ H264_QPEL_FUNCS_10(1, 0, ssse3_cache64)
+ H264_QPEL_FUNCS_10(2, 0, ssse3_cache64)
+ H264_QPEL_FUNCS_10(3, 0, ssse3_cache64)
+ }
#if HAVE_YASM
if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
@@ -2807,6 +2837,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_AVX && HAVE_YASM
if (mm_flags & AV_CPU_FLAG_AVX) {
if (bit_depth == 10) {
+ //AVX implies !cache64.
+ //TODO: Port cache(32|64) detection from x264.
+ H264_QPEL_FUNCS_10(1, 0, sse2)
+ H264_QPEL_FUNCS_10(2, 0, sse2)
+ H264_QPEL_FUNCS_10(3, 0, sse2)
+
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
}