From 4958f35a2ebc307049ff2104ffb944f5f457feb3 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 6 Dec 2013 12:22:40 +0000 Subject: dsputil: Move apply_window_int16 to ac3dsp The (optimized) functions are used nowhere else. --- libavcodec/arm/ac3dsp_init_arm.c | 3 +++ libavcodec/arm/ac3dsp_neon.S | 23 +++++++++++++++++++++++ libavcodec/arm/dsputil_init_neon.c | 5 ----- libavcodec/arm/dsputil_neon.S | 23 ----------------------- 4 files changed, 26 insertions(+), 28 deletions(-) (limited to 'libavcodec/arm') diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index d7cb95b669..a48353a099 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -31,6 +31,8 @@ void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs); +void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, + const int16_t *window, unsigned n); void ff_ac3_bit_alloc_calc_bap_armv6(int16_t *mask, int16_t *psd, int start, int end, @@ -56,5 +58,6 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon; c->float_to_fixed24 = ff_float_to_fixed24_neon; c->extract_exponents = ff_ac3_extract_exponents_neon; + c->apply_window_int16 = ff_apply_window_int16_neon; } } diff --git a/libavcodec/arm/ac3dsp_neon.S b/libavcodec/arm/ac3dsp_neon.S index 82ff8af9fb..f97b1907df 100644 --- a/libavcodec/arm/ac3dsp_neon.S +++ b/libavcodec/arm/ac3dsp_neon.S @@ -108,3 +108,26 @@ function ff_ac3_extract_exponents_neon, export=1 bgt 1b bx lr endfunc + +function ff_apply_window_int16_neon, export=1 + push {r4,lr} + add r4, r1, r3, lsl #1 + add lr, r0, r3, lsl #1 + sub r4, r4, #16 + sub lr, lr, #16 + mov r12, #-16 +1: + vld1.16 {q0}, [r1,:128]! + vld1.16 {q2}, [r2,:128]! + vld1.16 {q1}, [r4,:128], r12 + vrev64.16 q3, q2 + vqrdmulh.s16 q0, q0, q2 + vqrdmulh.s16 d2, d2, d7 + vqrdmulh.s16 d3, d3, d6 + vst1.16 {q0}, [r0,:128]! + vst1.16 {q1}, [lr,:128], r12 + subs r3, r3, #16 + bgt 1b + + pop {r4,pc} +endfunc diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 793fab1cf8..0926c84428 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -45,9 +45,6 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul); -void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, - const int16_t *window, unsigned n); - av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) { const int high_bit_depth = avctx->bits_per_raw_sample > 8; @@ -76,6 +73,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->scalarproduct_int16 = ff_scalarproduct_int16_neon; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; - - c->apply_window_int16 = ff_apply_window_int16_neon; } diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index 81662c04ad..e30bd10b17 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -169,29 +169,6 @@ NOVFP ldr r2, [sp] bx lr endfunc -function ff_apply_window_int16_neon, export=1 - push {r4,lr} - add r4, r1, r3, lsl #1 - add lr, r0, r3, lsl #1 - sub r4, r4, #16 - sub lr, lr, #16 - mov r12, #-16 -1: - vld1.16 {q0}, [r1,:128]! - vld1.16 {q2}, [r2,:128]! - vld1.16 {q1}, [r4,:128], r12 - vrev64.16 q3, q2 - vqrdmulh.s16 q0, q0, q2 - vqrdmulh.s16 d2, d2, d7 - vqrdmulh.s16 d3, d3, d6 - vst1.16 {q0}, [r0,:128]! - vst1.16 {q1}, [lr,:128], r12 - subs r3, r3, #16 - bgt 1b - - pop {r4,pc} -endfunc - function ff_vector_clip_int32_neon, export=1 vdup.32 q0, r2 vdup.32 q1, r3 -- cgit v1.2.3