summaryrefslogtreecommitdiff
path: root/libavcodec/x86/dsputil_mmx.c
diff options
context:
space:
mode:
authorDaniel Kang <daniel.d.kang@gmail.com>2011-07-02 22:18:39 -0400
committerRonald S. Bultje <rsbultje@gmail.com>2011-07-03 07:43:38 -0700
commit9bfa5363da21a5ba7bed174a82c86f2a089fc917 (patch)
tree99da032075f99024c444431c0bf1d90c92f94606 /libavcodec/x86/dsputil_mmx.c
parent10dde477c77e0ac0fecda49fdb1dc71329aa7513 (diff)
H.264: Add x86 assembly for 10-bit H.264 qpel functions.
Mainly ported from 8-bit H.264 qpel. Some code ported from x264. LGPL ok by author. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/dsputil_mmx.c')
-rw-r--r--libavcodec/x86/dsputil_mmx.c136
1 files changed, 86 insertions, 50 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 03c094533f..d6eed82dd8 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2627,44 +2627,56 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
}
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
- c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
-
- SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
+ c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU
+
+ SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, );
if (!high_bit_depth) {
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
}
+#if HAVE_YASM
+ else if (bit_depth == 10) {
+#if !ARCH_X86_64
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_);
+#endif
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
+ }
+#endif
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
#if HAVE_YASM
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
@@ -2725,26 +2737,26 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
}
- SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, );
if (!high_bit_depth) {
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, );
}
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
#if HAVE_YASM
if (!high_bit_depth) {
@@ -2788,7 +2800,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 3, sse2);
}
#if HAVE_YASM
+#define H264_QPEL_FUNCS_10(x, y, CPU)\
+ c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU;\
+ c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU;\
+ c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU;\
+ c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU;
if (bit_depth == 10) {
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
+ H264_QPEL_FUNCS_10(1, 0, sse2_cache64)
+ H264_QPEL_FUNCS_10(2, 0, sse2_cache64)
+ H264_QPEL_FUNCS_10(3, 0, sse2_cache64)
+
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2;
}
@@ -2810,6 +2835,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
}
+ else if (bit_depth == 10) {
+ H264_QPEL_FUNCS_10(1, 0, ssse3_cache64)
+ H264_QPEL_FUNCS_10(2, 0, ssse3_cache64)
+ H264_QPEL_FUNCS_10(3, 0, ssse3_cache64)
+ }
c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
#if HAVE_YASM
if (!high_bit_depth) {
@@ -2906,6 +2936,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_AVX && HAVE_YASM
if (mm_flags & AV_CPU_FLAG_AVX) {
if (bit_depth == 10) {
+ //AVX implies !cache64.
+ //TODO: Port cache(32|64) detection from x264.
+ H264_QPEL_FUNCS_10(1, 0, sse2)
+ H264_QPEL_FUNCS_10(2, 0, sse2)
+ H264_QPEL_FUNCS_10(3, 0, sse2)
+
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
}