From 818bfe7f0a3ff243deb63c4b146de2563f38ffd4 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 24 Jul 2015 08:24:21 +0200 Subject: hevcdsp: split the epel functions by width This should allow for more efficient SIMD. --- libavcodec/hevcdsp_template.c | 75 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 16 deletions(-) (limited to 'libavcodec/hevcdsp_template.c') diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index 84503ec2f6..d832904dcb 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -982,10 +982,10 @@ QPEL(12) QPEL(8) QPEL(4) -static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride, - uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) +static inline void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int width, int height, int mx, int my, + int16_t* mcbuffer) { int x, y; pixel *src = (pixel *)_src; @@ -1005,10 +1005,10 @@ static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride, filter_2 * src[x + stride] + \ filter_3 * src[x + 2 * stride]) -static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride, - uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) +static inline void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int width, int height, int mx, int my, + int16_t* mcbuffer) { int x, y; pixel *src = (pixel *)_src; @@ -1026,10 +1026,10 @@ static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride, } } -static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride, - uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) +static inline void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int width, int height, int mx, int my, + int16_t* mcbuffer) { int x, y; pixel *src = (pixel *)_src; @@ -1048,10 +1048,10 @@ static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride, } } -static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride, - uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) +static inline void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int width, int height, int mx, int my, + int16_t* mcbuffer) { int x, y; pixel *src = (pixel *)_src; @@ -1087,6 +1087,49 @@ static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride, } } +#define EPEL(W) \ +static void FUNC(put_hevc_epel_pixels_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + FUNC(put_hevc_epel_pixels)(dst, dststride, src, srcstride, \ + W, height, mx, my, mcbuffer); \ +} \ +static void FUNC(put_hevc_epel_h_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + FUNC(put_hevc_epel_h)(dst, dststride, src, srcstride, \ + W, height, mx, my, mcbuffer); \ +} \ +static void FUNC(put_hevc_epel_v_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + FUNC(put_hevc_epel_v)(dst, dststride, src, srcstride, \ + W, height, mx, my, mcbuffer); \ +} \ +static void FUNC(put_hevc_epel_hv_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + FUNC(put_hevc_epel_hv)(dst, dststride, src, srcstride, \ + W, height, mx, my, mcbuffer); \ +} + +EPEL(32) +EPEL(24) +EPEL(16) +EPEL(12) +EPEL(8) +EPEL(6) +EPEL(4) +EPEL(2) + static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride, int16_t *src, ptrdiff_t srcstride, int width, int height) -- cgit v1.2.3