summaryrefslogtreecommitdiff
path: root/libswscale/x86/output.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-02-08 02:59:09 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-02-08 05:53:35 +0100
commit18d0a16fc9d189b1d5593f9a42bb2316e9a66ca9 (patch)
treeaad3d9b1a07b9efebd7435bb27dde147cfa67913 /libswscale/x86/output.asm
parent950930b461cef025152de406f816a3b2efffb540 (diff)
parentef1c785f11c168384e42d147648c8fdf5317739b (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: swscale: make yuv2yuv1 use named registers. h264: mark h264_idct_add8_10 with number of XMM registers. swscale: fix V plane memory location in bilinear/unscaled RGB/YUYV case. vp8: always update next_framep[] before returning from decode_frame(). avconv: estimate next_dts from framerate if it is set. avconv: better next_dts usage. avconv: rename InputStream.pts to last_dts. avconv: reduce overloading for InputStream.pts. avconv: rename InputStream.next_pts to next_dts. avconv: rework -t handling for encoding. avconv: set encoder timebase for subtitles. pva-demux test: add -vn swscale: K&R formatting cosmetics for SPARC code apedec: allow the user to set the maximum number of output samples per call apedec: do not unnecessarily zero output samples for mono frames apedec: allocate a single flat buffer for decoded samples apedec: use sizeof(field) instead of sizeof(type) swscale: split C output functions into separate file. swscale: Split C input functions into separate file. bytestream: Add bytestream2 writing API. The avconv changes are due to massive regressions and bugs not merged yet. Conflicts: ffmpeg.c libavcodec/vp8.c libswscale/swscale.c libswscale/x86/swscale_template.c tests/fate/demux.mak tests/ref/lavf/asf tests/ref/lavf/avi tests/ref/lavf/mkv tests/ref/lavf/mpg tests/ref/lavf/nut tests/ref/lavf/ogg tests/ref/lavf/rm tests/ref/lavf/ts tests/ref/seek/lavf_avi tests/ref/seek/lavf_mkv tests/ref/seek/lavf_rm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86/output.asm')
-rw-r--r--libswscale/x86/output.asm50
1 files changed, 25 insertions, 25 deletions
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 70a2c16bcf..4b2f5c89eb 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -275,17 +275,17 @@ yuv2planeX_fn 10, 7, 5
%macro yuv2plane1_mainloop 2
.loop_%2:
%if %1 == 8
- paddsw m0, m2, [r0+r2*2+mmsize*0]
- paddsw m1, m3, [r0+r2*2+mmsize*1]
+ paddsw m0, m2, [srcq+dstwq*2+mmsize*0]
+ paddsw m1, m3, [srcq+dstwq*2+mmsize*1]
psraw m0, 7
psraw m1, 7
packuswb m0, m1
mov%2 [r1+r2], m0
%elif %1 == 16
- paddd m0, m4, [r0+r2*4+mmsize*0]
- paddd m1, m4, [r0+r2*4+mmsize*1]
- paddd m2, m4, [r0+r2*4+mmsize*2]
- paddd m3, m4, [r0+r2*4+mmsize*3]
+ paddd m0, m4, [srcq+dstwq*4+mmsize*0]
+ paddd m1, m4, [srcq+dstwq*4+mmsize*1]
+ paddd m2, m4, [srcq+dstwq*4+mmsize*2]
+ paddd m3, m4, [srcq+dstwq*4+mmsize*3]
psrad m0, 3
psrad m1, 3
psrad m2, 3
@@ -299,46 +299,46 @@ yuv2planeX_fn 10, 7, 5
paddw m0, m5
paddw m2, m5
%endif ; mmx/sse2/sse4/avx
- mov%2 [r1+r2*2], m0
- mov%2 [r1+r2*2+mmsize], m2
-%else
- paddsw m0, m2, [r0+r2*2+mmsize*0]
- paddsw m1, m2, [r0+r2*2+mmsize*1]
+ mov%2 [dstq+dstwq*2+mmsize*0], m0
+ mov%2 [dstq+dstwq*2+mmsize*1], m2
+%else ; %1 == 9/10
+ paddsw m0, m2, [srcq+dstwq*2+mmsize*0]
+ paddsw m1, m2, [srcq+dstwq*2+mmsize*1]
psraw m0, 15 - %1
psraw m1, 15 - %1
pmaxsw m0, m4
pmaxsw m1, m4
pminsw m0, m3
pminsw m1, m3
- mov%2 [r1+r2*2], m0
- mov%2 [r1+r2*2+mmsize], m1
+ mov%2 [dstq+dstwq*2+mmsize*0], m0
+ mov%2 [dstq+dstwq*2+mmsize*1], m1
%endif
- add r2, mmsize
+ add dstwq, mmsize
jl .loop_%2
%endmacro
%macro yuv2plane1_fn 3
-cglobal yuv2plane1_%1, %3, %3, %2
- add r2, mmsize - 1
- and r2, ~(mmsize - 1)
+cglobal yuv2plane1_%1, %3, %3, %2, src, dst, dstw, dither, offset
+ add dstwq, mmsize - 1
+ and dstwq, ~(mmsize - 1)
%if %1 == 8
- add r1, r2
+ add dstq, dstwq
%else ; %1 != 8
- lea r1, [r1+r2*2]
+ lea dstq, [dstq+dstwq*2]
%endif ; %1 == 8
%if %1 == 16
- lea r0, [r0+r2*4]
+ lea srcq, [srcq+dstwq*4]
%else ; %1 != 16
- lea r0, [r0+r2*2]
+ lea srcq, [srcq+dstwq*2]
%endif ; %1 == 16
- neg r2
+ neg dstwq
%if %1 == 8
pxor m4, m4 ; zero
; create registers holding dither
- movq m3, [r3] ; dither
- test r4d, r4d
+ movq m3, [ditherq] ; dither
+ test offsetd, offsetd
jz .no_rot
%if mmsize == 16
punpcklqdq m3, m3
@@ -374,7 +374,7 @@ cglobal yuv2plane1_%1, %3, %3, %2
%if mmsize == 8
yuv2plane1_mainloop %1, a
%else ; mmsize == 16
- test r1, 15
+ test dstq, 15
jnz .unaligned
yuv2plane1_mainloop %1, a
REP_RET