Merge remote-tracking branch 'qatar/master'

* qatar/master: (29 commits) sbrdsp.asm: convert all instructions to float/SSE ones. dv: cosmetics. dv: check buffer size before reading profile. Revert "AAC SBR: group some writes." udp: Print an error message if bind fails cook: extend channel uncoupling tables so the full bit range is covered. roqvideo: cosmetics. roqvideo: convert to bytestream2 API. dca: don't use av_clip_uintp2(). wmall: fix build with -DDEBUG enabled. smc: port to bytestream2 API. AAC SBR: group some writes. dsputil: remove shift parameter from scalarproduct_int16 SBR DSP: unroll sum_square rv34: remove dead code in intra availability check rv34: clean a bit availability checks. v4l2: update documentation tgq: convert to bytestream2 API. parser: remove forward declaration of MpegEncContext dca: prevent accessing static arrays with invalid indexes. ... Conflicts: doc/indevs.texi libavcodec/Makefile libavcodec/dca.c libavcodec/dvdata.c libavcodec/eatgq.c libavcodec/mmvideo.c libavcodec/roqvideodec.c libavcodec/smc.c libswscale/output.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2012-03-08 02:28:40 +0100
committer: Michael Niedermayer <michaelni@gmx.at> 2012-03-08 02:51:45 +0100
commit: bf807a5e874442aa3fe1b475459cdd509e34bff4 (patch)
tree: f8067bfb5e99b8b8e2716a7ea8519a4aaa8ac60f /libavcodec/x86
parent: 4cda8aa1c5bc58f8a7f53a21a19b03e7379bbcdc (diff)
parent: 6eda85e15b38863a627fd0602098aa3250174698 (diff)
3 files changed, 10 insertions, 23 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 8e8c10c189..7ddc770a2e 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -35,13 +35,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 SECTION_TEXT
 
 %macro SCALARPRODUCT 1
-; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift)
-cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
+; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
+cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order
     shl orderq, 1
     add v1q, orderq
     add v2q, orderq
     neg orderq
-    movd    m3, shiftm
     pxor    m2, m2
 .loop:
     movu    m0, [v1q + orderq]
@@ -55,10 +54,8 @@ cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
 %if mmsize == 16
     movhlps m0, m2
     paddd   m2, m0
-    psrad   m2, m3
     pshuflw m0, m2, 0x4e
 %else
-    psrad   m2, m3
     pshufw  m0, m2, 0x4e
 %endif
     paddd   m2, m0
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index 807d8548d6..6cc3ac823d 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -1161,16 +1161,6 @@ QPEL(put_, 16,XMM, 16)\
 QPEL(avg_, 8, XMM, 16)\
 QPEL(avg_, 16,XMM, 16)\
 
-
-#define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp "   \n\t"\
-"pavgusb " #temp ", " #a "        \n\t"\
-"mov" #size " " #a ", " #b "      \n\t"
-#define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp "   \n\t"\
-"pavgb " #temp ", " #a "          \n\t"\
-"mov" #size " " #a ", " #b "      \n\t"
-
 #define PAVGB "pavgusb"
 QPEL_H264(put_,       PUT_OP, 3dnow)
 QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c3b559bb15..31a1c8b76f 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5
     lea         r0, [r0 + r3*8]
     neg         r3
 .loop4:
-    movq        m0, [r2 + 4*r3 + 0]
-    movq        m1, [r2 + 4*r3 + 8]
-    movq        m2, [r1 + 0*STEP]
-    movq        m3, [r1 + 2*STEP]
+    movlps      m0, [r2 + 4*r3 + 0]
+    movlps      m1, [r2 + 4*r3 + 8]
+    movlps      m2, [r1 + 0*STEP]
+    movlps      m3, [r1 + 2*STEP]
     movhps      m2, [r1 + 1*STEP]
     movhps      m3, [r1 + 3*STEP]
-    punpckldq   m0, m0
-    punpckldq   m1, m1
+    unpcklps    m0, m0
+    unpcklps    m1, m1
     mulps       m0, m2
     mulps       m1, m3
     movu        [r0 + 8*r3 +  0], m0
@@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5
     jz          .end
 .loop1: ; element 0 and 1 can be computed at the same time
     movss       m0, [r2]
-    movq        m2, [r1]
-    punpckldq   m0, m0
+    movlps      m2, [r1]
+    unpcklps    m0, m0
     mulps       m2, m0
     movlps    [r0], m2
     add         r0, 8
author	Michael Niedermayer <michaelni@gmx.at>	2012-03-08 02:28:40 +0100
committer	Michael Niedermayer <michaelni@gmx.at>	2012-03-08 02:51:45 +0100
commit	bf807a5e874442aa3fe1b475459cdd509e34bff4 (patch)
tree	f8067bfb5e99b8b8e2716a7ea8519a4aaa8ac60f /libavcodec/x86
parent	4cda8aa1c5bc58f8a7f53a21a19b03e7379bbcdc (diff)
parent	6eda85e15b38863a627fd0602098aa3250174698 (diff)