summaryrefslogtreecommitdiff
path: root/libavcodec/hevcdsp_template.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2015-07-24 08:24:21 +0200
committerAnton Khirnov <anton@khirnov.net>2015-12-05 21:08:04 +0100
commit1f821750f0b8d0c87cbf88a28ad699b92db5ec88 (patch)
tree78ec782cd9a422df28fa05c2c0a72650f1ec89c1 /libavcodec/hevcdsp_template.c
parent6788baebb3680d447eabdadf3f5743c8470a4611 (diff)
hevcdsp: split the qpel functions by width instead of by the subpixel fraction
This should allow for more efficient SIMD. Keep the C versions as they are now, to allow the compiler to inline the interpolation coefficients.
Diffstat (limited to 'libavcodec/hevcdsp_template.c')
-rw-r--r--libavcodec/hevcdsp_template.c82
1 files changed, 79 insertions, 3 deletions
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index 390f683295..84503ec2f6 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -775,9 +775,11 @@ static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src,
#undef TR_16
#undef TR_32
-static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
- uint8_t *_src, ptrdiff_t _srcstride,
- int width, int height, int16_t* mcbuffer)
+static av_always_inline void
+FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
+ uint8_t *_src, ptrdiff_t _srcstride,
+ int width, int height, int mx, int my,
+ int16_t* mcbuffer)
{
int x, y;
pixel *src = (pixel *)_src;
@@ -906,6 +908,80 @@ PUT_HEVC_QPEL_HV(3, 1)
PUT_HEVC_QPEL_HV(3, 2)
PUT_HEVC_QPEL_HV(3, 3)
+#define QPEL(W) \
+static void FUNC(put_hevc_qpel_pixels_ ## W)(int16_t *dst, ptrdiff_t dststride, \
+ uint8_t *src, ptrdiff_t srcstride, \
+ int height, int mx, int my, \
+ int16_t *mcbuffer) \
+{ \
+ FUNC(put_hevc_qpel_pixels)(dst, dststride, src, srcstride, W, height, \
+ mx, my, mcbuffer); \
+} \
+ \
+static void FUNC(put_hevc_qpel_h_ ## W)(int16_t *dst, ptrdiff_t dststride, \
+ uint8_t *src, ptrdiff_t srcstride, \
+ int height, int mx, int my, \
+ int16_t *mcbuffer) \
+{ \
+ if (mx == 1) \
+ FUNC(put_hevc_qpel_h1)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else if (mx == 2) \
+ FUNC(put_hevc_qpel_h2)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else \
+ FUNC(put_hevc_qpel_h3)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+} \
+ \
+static void FUNC(put_hevc_qpel_v_ ## W)(int16_t *dst, ptrdiff_t dststride, \
+ uint8_t *src, ptrdiff_t srcstride, \
+ int height, int mx, int my, \
+ int16_t *mcbuffer) \
+{ \
+ if (my == 1) \
+ FUNC(put_hevc_qpel_v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else if (my == 2) \
+ FUNC(put_hevc_qpel_v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else \
+ FUNC(put_hevc_qpel_v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+} \
+ \
+static void FUNC(put_hevc_qpel_hv_ ## W)(int16_t *dst, ptrdiff_t dststride, \
+ uint8_t *src, ptrdiff_t srcstride, \
+ int height, int mx, int my, \
+ int16_t *mcbuffer) \
+{ \
+ if (my == 1) { \
+ if (mx == 1) \
+ FUNC(put_hevc_qpel_h1v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else if (mx == 2) \
+ FUNC(put_hevc_qpel_h2v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else \
+ FUNC(put_hevc_qpel_h3v1)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ } else if (my == 2) { \
+ if (mx == 1) \
+ FUNC(put_hevc_qpel_h1v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else if (mx == 2) \
+ FUNC(put_hevc_qpel_h2v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else \
+ FUNC(put_hevc_qpel_h3v2)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ } else { \
+ if (mx == 1) \
+ FUNC(put_hevc_qpel_h1v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else if (mx == 2) \
+ FUNC(put_hevc_qpel_h2v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ else \
+ FUNC(put_hevc_qpel_h3v3)(dst, dststride, src, srcstride, W, height, mcbuffer); \
+ } \
+}
+
+QPEL(64)
+QPEL(48)
+QPEL(32)
+QPEL(24)
+QPEL(16)
+QPEL(12)
+QPEL(8)
+QPEL(4)
+
static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
uint8_t *_src, ptrdiff_t _srcstride,
int width, int height, int mx, int my,