summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2017-02-02 17:51:21 -0300
committerJames Almer <jamrial@gmail.com>2017-02-02 17:51:21 -0300
commitc8467abbadab424757ea23f71a1036abfb7f14b4 (patch)
tree41a4db9cdb0453253f65df734df858cf0e08ca7a /libavcodec/x86
parentab5c4d006da0b3e7aea6fddcebdf5d3d9ad7fd30 (diff)
x86/rv34dsp: add ff_rv34_idct_dc_add_sse2
Also disable ff_rv34_idct_dc_add_mmx on x86_64 as the presence of sse2 is guaranteed in such builds. Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/rv34dsp.asm19
-rw-r--r--libavcodec/x86/rv34dsp_init.c5
2 files changed, 22 insertions, 2 deletions
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index 7732d65b2a..692b4acfcd 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -64,6 +64,7 @@ rv34_idct dc
rv34_idct dc_noround
; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
+%if ARCH_X86_32
INIT_MMX mmx
cglobal rv34_idct_dc_add, 3, 3
; calculate DC
@@ -97,6 +98,7 @@ cglobal rv34_idct_dc_add, 3, 3
movh [r2], m4
movh [r2+r1], m5
RET
+%endif
; Load coeffs and perform row transform
; Output: coeffs in mm[0467], rounder in mm5
@@ -167,7 +169,7 @@ cglobal rv34_idct_add, 3,3,0, d, s, b
ret
; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
-INIT_XMM sse4
+%macro RV34_IDCT_DC_ADD 0
cglobal rv34_idct_dc_add, 3, 3, 6
; load data
IDCT_DC_ROUND r2
@@ -190,7 +192,22 @@ cglobal rv34_idct_dc_add, 3, 3, 6
paddw m4, m0
packuswb m2, m4
movd [r0], m2
+%if cpuflag(sse4)
pextrd [r0+r1], m2, 1
pextrd [r2], m2, 2
pextrd [r2+r1], m2, 3
+%else
+ psrldq m2, 4
+ movd [r0+r1], m2
+ psrldq m2, 4
+ movd [r2], m2
+ psrldq m2, 4
+ movd [r2+r1], m2
+%endif
RET
+%endmacro
+
+INIT_XMM sse2
+RV34_IDCT_DC_ADD
+INIT_XMM sse4
+RV34_IDCT_DC_ADD
diff --git a/libavcodec/x86/rv34dsp_init.c b/libavcodec/x86/rv34dsp_init.c
index d417dac45b..7310122458 100644
--- a/libavcodec/x86/rv34dsp_init.c
+++ b/libavcodec/x86/rv34dsp_init.c
@@ -27,6 +27,7 @@
void ff_rv34_idct_dc_mmxext(int16_t *block);
void ff_rv34_idct_dc_noround_mmxext(int16_t *block);
void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
+void ff_rv34_idct_dc_add_sse2(uint8_t *dst, ptrdiff_t stride, int dc);
void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
@@ -34,12 +35,14 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c)
{
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_MMX(cpu_flags))
+ if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags))
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
c->rv34_idct_add = ff_rv34_idct_add_mmxext;
}
+ if (EXTERNAL_SSE2(cpu_flags))
+ c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse2;
if (EXTERNAL_SSE4(cpu_flags))
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
}