summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-03-08 02:28:40 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-03-08 02:51:45 +0100
commitbf807a5e874442aa3fe1b475459cdd509e34bff4 (patch)
treef8067bfb5e99b8b8e2716a7ea8519a4aaa8ac60f /libavcodec/x86
parent4cda8aa1c5bc58f8a7f53a21a19b03e7379bbcdc (diff)
parent6eda85e15b38863a627fd0602098aa3250174698 (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: (29 commits) sbrdsp.asm: convert all instructions to float/SSE ones. dv: cosmetics. dv: check buffer size before reading profile. Revert "AAC SBR: group some writes." udp: Print an error message if bind fails cook: extend channel uncoupling tables so the full bit range is covered. roqvideo: cosmetics. roqvideo: convert to bytestream2 API. dca: don't use av_clip_uintp2(). wmall: fix build with -DDEBUG enabled. smc: port to bytestream2 API. AAC SBR: group some writes. dsputil: remove shift parameter from scalarproduct_int16 SBR DSP: unroll sum_square rv34: remove dead code in intra availability check rv34: clean a bit availability checks. v4l2: update documentation tgq: convert to bytestream2 API. parser: remove forward declaration of MpegEncContext dca: prevent accessing static arrays with invalid indexes. ... Conflicts: doc/indevs.texi libavcodec/Makefile libavcodec/dca.c libavcodec/dvdata.c libavcodec/eatgq.c libavcodec/mmvideo.c libavcodec/roqvideodec.c libavcodec/smc.c libswscale/output.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/dsputil_yasm.asm7
-rw-r--r--libavcodec/x86/h264_qpel_mmx.c10
-rw-r--r--libavcodec/x86/sbrdsp.asm16
3 files changed, 10 insertions, 23 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 8e8c10c189..7ddc770a2e 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -35,13 +35,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
SECTION_TEXT
%macro SCALARPRODUCT 1
-; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift)
-cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
+; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
+cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order
shl orderq, 1
add v1q, orderq
add v2q, orderq
neg orderq
- movd m3, shiftm
pxor m2, m2
.loop:
movu m0, [v1q + orderq]
@@ -55,10 +54,8 @@ cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
%if mmsize == 16
movhlps m0, m2
paddd m2, m0
- psrad m2, m3
pshuflw m0, m2, 0x4e
%else
- psrad m2, m3
pshufw m0, m2, 0x4e
%endif
paddd m2, m0
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index 807d8548d6..6cc3ac823d 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -1161,16 +1161,6 @@ QPEL(put_, 16,XMM, 16)\
QPEL(avg_, 8, XMM, 16)\
QPEL(avg_, 16,XMM, 16)\
-
-#define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgusb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-#define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-
#define PAVGB "pavgusb"
QPEL_H264(put_, PUT_OP, 3dnow)
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c3b559bb15..31a1c8b76f 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5
lea r0, [r0 + r3*8]
neg r3
.loop4:
- movq m0, [r2 + 4*r3 + 0]
- movq m1, [r2 + 4*r3 + 8]
- movq m2, [r1 + 0*STEP]
- movq m3, [r1 + 2*STEP]
+ movlps m0, [r2 + 4*r3 + 0]
+ movlps m1, [r2 + 4*r3 + 8]
+ movlps m2, [r1 + 0*STEP]
+ movlps m3, [r1 + 2*STEP]
movhps m2, [r1 + 1*STEP]
movhps m3, [r1 + 3*STEP]
- punpckldq m0, m0
- punpckldq m1, m1
+ unpcklps m0, m0
+ unpcklps m1, m1
mulps m0, m2
mulps m1, m3
movu [r0 + 8*r3 + 0], m0
@@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5
jz .end
.loop1: ; element 0 and 1 can be computed at the same time
movss m0, [r2]
- movq m2, [r1]
- punpckldq m0, m0
+ movlps m2, [r1]
+ unpcklps m0, m0
mulps m2, m0
movlps [r0], m2
add r0, 8