diff options
author | Anton Khirnov <anton@khirnov.net> | 2015-07-24 08:24:21 +0200 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2015-12-05 21:08:04 +0100 |
commit | 1f821750f0b8d0c87cbf88a28ad699b92db5ec88 (patch) | |
tree | 78ec782cd9a422df28fa05c2c0a72650f1ec89c1 /libavcodec/hevcdsp_template.c | |
parent | 6788baebb3680d447eabdadf3f5743c8470a4611 (diff) |
hevcdsp: split the qpel functions by width instead of by the subpixel fraction
This should allow for more efficient SIMD.
Keep the C versions as they are now, to allow the compiler to inline the
interpolation coefficients.
Diffstat (limited to 'libavcodec/hevcdsp_template.c')
-rw-r--r-- | libavcodec/hevcdsp_template.c | 82 |
1 files changed, 79 insertions, 3 deletions
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index 390f683295..84503ec2f6 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -775,9 +775,11 @@ static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src, #undef TR_16 #undef TR_32 -static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride, - uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int16_t* mcbuffer) +static av_always_inline void +FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int width, int height, int mx, int my, + int16_t* mcbuffer) { int x, y; pixel *src = (pixel *)_src; @@ -906,6 +908,80 @@ PUT_HEVC_QPEL_HV(3, 1) PUT_HEVC_QPEL_HV(3, 2) PUT_HEVC_QPEL_HV(3, 3) +#define QPEL(W) \ +static void FUNC(put_hevc_qpel_pixels_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + FUNC(put_hevc_qpel_pixels)(dst, dststride, src, srcstride, W, height, \ + mx, my, mcbuffer); \ +} \ + \ +static void FUNC(put_hevc_qpel_h_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + if (mx == 1) \ + FUNC(put_hevc_qpel_h1)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else if (mx == 2) \ + FUNC(put_hevc_qpel_h2)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else \ + FUNC(put_hevc_qpel_h3)(dst, dststride, src, srcstride, W, height, mcbuffer); \ +} \ + \ +static void FUNC(put_hevc_qpel_v_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + if (my == 1) \ + FUNC(put_hevc_qpel_v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else if (my == 2) \ + FUNC(put_hevc_qpel_v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else \ + FUNC(put_hevc_qpel_v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \ +} \ + \ +static void FUNC(put_hevc_qpel_hv_ ## W)(int16_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int mx, int my, \ + int16_t *mcbuffer) \ +{ \ + if (my == 1) { \ + if (mx == 1) \ + FUNC(put_hevc_qpel_h1v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else if (mx == 2) \ + FUNC(put_hevc_qpel_h2v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else \ + FUNC(put_hevc_qpel_h3v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + } else if (my == 2) { \ + if (mx == 1) \ + FUNC(put_hevc_qpel_h1v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else if (mx == 2) \ + FUNC(put_hevc_qpel_h2v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else \ + FUNC(put_hevc_qpel_h3v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + } else { \ + if (mx == 1) \ + FUNC(put_hevc_qpel_h1v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else if (mx == 2) \ + FUNC(put_hevc_qpel_h2v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + else \ + FUNC(put_hevc_qpel_h3v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \ + } \ +} + +QPEL(64) +QPEL(48) +QPEL(32) +QPEL(24) +QPEL(16) +QPEL(12) +QPEL(8) +QPEL(4) + static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int mx, int my, |