From bef966e341b1f39a9769f0c3bb75b3bc2a64f85c Mon Sep 17 00:00:00 2001 From: Måns Rullgård Date: Wed, 7 Oct 2009 21:35:19 +0000 Subject: ARM: NEON avg_pixels8 and avg_h264_qpel8_mc00 Originally committed as revision 20190 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/arm/dsputil_init_neon.c | 6 ++++++ libavcodec/arm/dsputil_neon.S | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'libavcodec') diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index a1d4980171..ef706b2873 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -49,6 +49,7 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int); +void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, int, int); void ff_add_pixels_clamped_neon(const DCTELEM *, uint8_t *, int); void ff_put_pixels_clamped_neon(const DCTELEM *, uint8_t *, int); @@ -90,6 +91,8 @@ void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); +void ff_avg_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int); + void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); @@ -230,6 +233,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon; c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon; + c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon; c->add_pixels_clamped = ff_add_pixels_clamped_neon; c->put_pixels_clamped = ff_put_pixels_clamped_neon; @@ -278,6 +282,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon; + c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon; + c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index 56e7cd3e96..ef5e8c7152 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -139,7 +139,7 @@ bx lr .endm - .macro pixels8 + .macro pixels8 avg=0 1: vld1.64 {d0}, [r1], r2 vld1.64 {d1}, [r1], r2 vld1.64 {d2}, [r1], r2 @@ -148,6 +148,17 @@ pld [r1] pld [r1, r2] pld [r1, r2, lsl #1] +.if \avg + vld1.64 {d4}, [r0,:64], r2 + vrhadd.u8 d0, d0, d4 + vld1.64 {d5}, [r0,:64], r2 + vrhadd.u8 d1, d1, d5 + vld1.64 {d6}, [r0,:64], r2 + vrhadd.u8 d2, d2, d6 + vld1.64 {d7}, [r0,:64], r2 + vrhadd.u8 d3, d3, d7 + sub r0, r0, r2, lsl #2 +.endif subs r3, r3, #4 vst1.64 {d0}, [r0,:64], r2 vst1.64 {d1}, [r0,:64], r2 @@ -261,6 +272,12 @@ function ff_put_h264_qpel8_mc00_neon, export=1 pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8 pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1 +function ff_avg_h264_qpel8_mc00_neon, export=1 + mov r3, #8 + .endfunc + + pixfunc avg_ pixels8,, 1 + function ff_put_pixels_clamped_neon, export=1 vld1.64 {d16-d19}, [r0,:128]! vqmovun.s16 d0, q8 -- cgit v1.2.3