summaryrefslogtreecommitdiff
path: root/libavcodec/i386/dsputil_mmx.c
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2008-02-06 12:32:31 +0000
committerLoren Merritt <lorenm@u.washington.edu>2008-02-06 12:32:31 +0000
commit1d67b037f7fc344f2e0b4dac6ac0ca3dd5438e6c (patch)
tree796bdb0ab7fa2efb3a8b385d70f7128b3291a172 /libavcodec/i386/dsputil_mmx.c
parent20d565be6d52c38495eeaa8904c02315421629b3 (diff)
sse2 h264 motion compensation. not new code, just separate out the cases that didn't need ssse3.
Originally committed as revision 11877 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputil_mmx.c')
-rw-r--r--libavcodec/i386/dsputil_mmx.c42
1 files changed, 35 insertions, 7 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 55343181de..54246fc8ba 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -3523,20 +3523,48 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
}
-/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
+
+#define H264_QPEL_FUNCS(x, y, CPU)\
+ c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
+ c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
+ c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
+ c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
if((mm_flags & MM_SSE2) && !(mm_flags & MM_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
+/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
- }
*/
-
+ H264_QPEL_FUNCS(0, 0, sse2);
+ }
+ if(mm_flags & MM_SSE2){
+ H264_QPEL_FUNCS(0, 1, sse2);
+ H264_QPEL_FUNCS(0, 2, sse2);
+ H264_QPEL_FUNCS(0, 3, sse2);
+ H264_QPEL_FUNCS(1, 1, sse2);
+ H264_QPEL_FUNCS(1, 2, sse2);
+ H264_QPEL_FUNCS(1, 3, sse2);
+ H264_QPEL_FUNCS(2, 1, sse2);
+ H264_QPEL_FUNCS(2, 2, sse2);
+ H264_QPEL_FUNCS(2, 3, sse2);
+ H264_QPEL_FUNCS(3, 1, sse2);
+ H264_QPEL_FUNCS(3, 2, sse2);
+ H264_QPEL_FUNCS(3, 3, sse2);
+ }
#ifdef HAVE_SSSE3
if(mm_flags & MM_SSSE3){
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, ssse3);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, ssse3);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, ssse3);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, ssse3);
+ H264_QPEL_FUNCS(1, 0, ssse3);
+ H264_QPEL_FUNCS(1, 1, ssse3);
+ H264_QPEL_FUNCS(1, 2, ssse3);
+ H264_QPEL_FUNCS(1, 3, ssse3);
+ H264_QPEL_FUNCS(2, 0, ssse3);
+ H264_QPEL_FUNCS(2, 1, ssse3);
+ H264_QPEL_FUNCS(2, 2, ssse3);
+ H264_QPEL_FUNCS(2, 3, ssse3);
+ H264_QPEL_FUNCS(3, 0, ssse3);
+ H264_QPEL_FUNCS(3, 1, ssse3);
+ H264_QPEL_FUNCS(3, 2, ssse3);
+ H264_QPEL_FUNCS(3, 3, ssse3);
}
#endif