summaryrefslogtreecommitdiff
path: root/libswscale/x86/output.asm
diff options
context:
space:
mode:
authorAndreas Rheinhardt <andreas.rheinhardt@outlook.com>2022-06-09 16:57:34 +0200
committerAndreas Rheinhardt <andreas.rheinhardt@outlook.com>2022-06-22 13:36:04 +0200
commita05f22eaf393177b94432431c145cbc5ba10390a (patch)
tree31a2ae01b520b2578477ea4b6b2febf1d0e6c1e6 /libswscale/x86/output.asm
parent2831837182fe26f0a19a4d366f3f0553311f1291 (diff)
swscale/x86/swscale: Remove obsolete and harmful MMX(EXT) functions
x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT, SSE and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2). So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Moreover, some of the removed code was buggy/not bitexact and lead to failures involving the f32le and f32be versions of gray, gbrp and gbrap on x86-32 when SSE2 was not disabled. See e.g. https://fate.ffmpeg.org/report.cgi?time=20220609221253&slot=x86_32-debian-kfreebsd-gcc-4.4-cpuflags-mmx Notice that yuv2yuvX_mmx is not removed, because it is used by SSE3 and AVX2 as fallback in case of unaligned data and also for tail processing. I don't know why yuv2yuvX_mmxext isn't being used for this; an earlier version [1] of 554c2bc7086f49ef5a6a989ad6bc4bc11807eb6f used it, but the version that was eventually applied does not. [1]: https://ffmpeg.org/pipermail/ffmpeg-devel/2020-November/272124.html Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Diffstat (limited to 'libswscale/x86/output.asm')
-rw-r--r--libswscale/x86/output.asm30
1 files changed, 3 insertions, 27 deletions
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 1e498fddf6..84e94baaf6 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -312,11 +312,9 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
%endif ; %1 == 8/9/10/16
%endmacro
-%if ARCH_X86_32
+%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
INIT_MMX mmxext
yuv2planeX_fn 8, 0, 7
-yuv2planeX_fn 9, 0, 5
-yuv2planeX_fn 10, 0, 5
%endif
INIT_XMM sse2
@@ -407,19 +405,11 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset
movq m3, [ditherq] ; dither
test offsetd, offsetd
jz .no_rot
-%if mmsize == 16
punpcklqdq m3, m3
-%endif ; mmsize == 16
PALIGNR m3, m3, 3, m2
.no_rot:
-%if mmsize == 8
- mova m2, m3
- punpckhbw m3, m4 ; byte->word
- punpcklbw m2, m4 ; byte->word
-%else
punpcklbw m3, m4
mova m2, m3
-%endif
%elif %1 == 9
pxor m4, m4
mova m3, [pw_512]
@@ -431,36 +421,22 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset
%else ; %1 == 16
%if cpuflag(sse4) ; sse4/avx
mova m4, [pd_4]
-%else ; mmx/sse2
+%else ; sse2
mova m4, [pd_4min0x40000]
mova m5, [minshort]
-%endif ; mmx/sse2/sse4/avx
+%endif ; sse2/sse4/avx
%endif ; %1 == ..
; actual pixel scaling
-%if mmsize == 8
- yuv2plane1_mainloop %1, a
-%else ; mmsize == 16
test dstq, 15
jnz .unaligned
yuv2plane1_mainloop %1, a
REP_RET
.unaligned:
yuv2plane1_mainloop %1, u
-%endif ; mmsize == 8/16
REP_RET
%endmacro
-%if ARCH_X86_32
-INIT_MMX mmx
-yuv2plane1_fn 8, 0, 5
-yuv2plane1_fn 16, 0, 3
-
-INIT_MMX mmxext
-yuv2plane1_fn 9, 0, 3
-yuv2plane1_fn 10, 0, 3
-%endif
-
INIT_XMM sse2
yuv2plane1_fn 8, 5, 5
yuv2plane1_fn 9, 5, 3