summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2017-01-31 15:08:19 -0300
committerJames Almer <jamrial@gmail.com>2017-01-31 15:08:19 -0300
commitac774cfa571734c49c26e2d3387adccff8957ff8 (patch)
tree92011adfe335fe1160bb26b159fd1510d96458f1 /libavcodec/x86
parenta956164e1eb3418922cae949f02ad4035f013213 (diff)
parent4efab89332ea39a77145e8b15562b981d9dbde68 (diff)
Merge commit '4efab89332ea39a77145e8b15562b981d9dbde68'
* commit '4efab89332ea39a77145e8b15562b981d9dbde68': x86: Use *_FAST/*_SLOW CPU feature detection macros where appropriate Merged-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/ac3dsp_init.c13
-rw-r--r--libavcodec/x86/h264_qpel.c11
-rw-r--r--libavcodec/x86/lpc.c2
-rw-r--r--libavcodec/x86/vp8dsp_init.c4
4 files changed, 17 insertions, 13 deletions
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index 07f0d25681..9fd0aef83e 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -228,16 +228,19 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
c->extract_exponents = ff_ac3_extract_exponents_sse2;
- if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
- c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
- c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
- }
if (bit_exact) {
c->apply_window_int16 = ff_apply_window_int16_sse2;
- } else if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
+ }
+ }
+
+ if (EXTERNAL_SSE2_FAST(cpu_flags)) {
+ c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
+ c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
+ if (!bit_exact) {
c->apply_window_int16 = ff_apply_window_int16_round_sse2;
}
}
+
if (EXTERNAL_SSSE3(cpu_flags)) {
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
if (cpu_flags & AV_CPU_FLAG_ATOM) {
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index 8c4b1e1288..96fa4a706a 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -562,11 +562,6 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
}
if (EXTERNAL_SSE2(cpu_flags)) {
- if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
- // these functions are slower than mmx on AMD, but faster on Intel
- H264_QPEL_FUNCS(0, 0, sse2);
- }
-
if (!high_bit_depth) {
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
@@ -593,6 +588,12 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
}
}
+ if (EXTERNAL_SSE2_FAST(cpu_flags)) {
+ if (!high_bit_depth) {
+ H264_QPEL_FUNCS(0, 0, sse2);
+ }
+ }
+
if (EXTERNAL_SSSE3(cpu_flags)) {
if (!high_bit_depth) {
H264_QPEL_FUNCS(1, 0, ssse3);
diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c
index 3a9493f728..6c72e21bac 100644
--- a/libavcodec/x86/lpc.c
+++ b/libavcodec/x86/lpc.c
@@ -154,7 +154,7 @@ av_cold void ff_lpc_init_x86(LPCContext *c)
#if HAVE_SSE2_INLINE
int cpu_flags = av_get_cpu_flags();
- if (HAVE_SSE2_INLINE && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+ if (INLINE_SSE2(cpu_flags) || INLINE_SSE2_SLOW(cpu_flags)) {
c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
c->lpc_compute_autocorr = lpc_compute_autocorr_sse2;
}
diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c
index 897d5a0e77..20c5fac0c0 100644
--- a/libavcodec/x86/vp8dsp_init.c
+++ b/libavcodec/x86/vp8dsp_init.c
@@ -346,7 +346,7 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
}
- if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+ if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
VP8_LUMA_MC_FUNC(0, 16, sse2);
VP8_MC_FUNC(1, 8, sse2);
VP8_BILINEAR_MC_FUNC(0, 16, sse2);
@@ -416,7 +416,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
}
- if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+ if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;