summaryrefslogtreecommitdiff
path: root/libavcodec/hevcdsp_template.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2015-07-24 18:56:54 +0200
committerAnton Khirnov <anton@khirnov.net>2015-12-05 21:10:41 +0100
commit688417399c69aadd4c287bdb0dec82ef8799011c (patch)
tree872ef2840ecd6007bad27df007b0f97ee19c9ae0 /libavcodec/hevcdsp_template.c
parent818bfe7f0a3ff243deb63c4b146de2563f38ffd4 (diff)
hevcdsp: split the pred functions by width
This should allow for more efficient SIMD.
Diffstat (limited to 'libavcodec/hevcdsp_template.c')
-rw-r--r--libavcodec/hevcdsp_template.c81
1 files changed, 63 insertions, 18 deletions
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index d832904dcb..723f4d4520 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -1130,9 +1130,10 @@ EPEL(6)
EPEL(4)
EPEL(2)
-static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
- int16_t *src, ptrdiff_t srcstride,
- int width, int height)
+static av_always_inline void
+FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
+ int16_t *src, ptrdiff_t srcstride,
+ int width, int height)
{
int x, y;
pixel *dst = (pixel *)_dst;
@@ -1152,10 +1153,11 @@ static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
}
}
-static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
- int16_t *src1, int16_t *src2,
- ptrdiff_t srcstride,
- int width, int height)
+static av_always_inline void
+FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
+ int16_t *src1, int16_t *src2,
+ ptrdiff_t srcstride,
+ int width, int height)
{
int x, y;
pixel *dst = (pixel *)_dst;
@@ -1177,10 +1179,11 @@ static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
}
}
-static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
- uint8_t *_dst, ptrdiff_t _dststride,
- int16_t *src, ptrdiff_t srcstride,
- int width, int height)
+static av_always_inline void
+FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
+ uint8_t *_dst, ptrdiff_t _dststride,
+ int16_t *src, ptrdiff_t srcstride,
+ int width, int height)
{
int shift, log2Wd, wx, ox, x, y, offset;
pixel *dst = (pixel *)_dst;
@@ -1205,13 +1208,14 @@ static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
}
}
-static void FUNC(weighted_pred_avg)(uint8_t denom,
- int16_t wl0Flag, int16_t wl1Flag,
- int16_t ol0Flag, int16_t ol1Flag,
- uint8_t *_dst, ptrdiff_t _dststride,
- int16_t *src1, int16_t *src2,
- ptrdiff_t srcstride,
- int width, int height)
+static av_always_inline void
+FUNC(weighted_pred_avg)(uint8_t denom,
+ int16_t wl0Flag, int16_t wl1Flag,
+ int16_t ol0Flag, int16_t ol1Flag,
+ uint8_t *_dst, ptrdiff_t _dststride,
+ int16_t *src1, int16_t *src2,
+ ptrdiff_t srcstride,
+ int width, int height)
{
int shift, log2Wd, w0, w1, o0, o1, x, y;
pixel *dst = (pixel *)_dst;
@@ -1234,6 +1238,47 @@ static void FUNC(weighted_pred_avg)(uint8_t denom,
}
}
+#define PUT_PRED(w) \
+static void FUNC(put_unweighted_pred_ ## w)(uint8_t *dst, ptrdiff_t dststride, \
+ int16_t *src, ptrdiff_t srcstride, \
+ int height) \
+{ \
+ FUNC(put_unweighted_pred)(dst, dststride, src, srcstride, w, height); \
+} \
+static void FUNC(put_unweighted_pred_avg_ ## w)(uint8_t *dst, ptrdiff_t dststride, \
+ int16_t *src1, int16_t *src2, \
+ ptrdiff_t srcstride, int height) \
+{ \
+ FUNC(put_unweighted_pred_avg)(dst, dststride, src1, src2, srcstride, w, height); \
+} \
+static void FUNC(put_weighted_pred_ ## w)(uint8_t denom, int16_t weight, int16_t offset, \
+ uint8_t *dst, ptrdiff_t dststride, \
+ int16_t *src, ptrdiff_t srcstride, int height) \
+{ \
+ FUNC(weighted_pred)(denom, weight, offset, \
+ dst, dststride, src, srcstride, w, height); \
+} \
+static void FUNC(put_weighted_pred_avg_ ## w)(uint8_t denom, int16_t weight0, int16_t weight1, \
+ int16_t offset0, int16_t offset1, \
+ uint8_t *dst, ptrdiff_t dststride, \
+ int16_t *src1, int16_t *src2, \
+ ptrdiff_t srcstride, int height) \
+{ \
+ FUNC(weighted_pred_avg)(denom, weight0, weight1, offset0, offset1, \
+ dst, dststride, src1, src2, srcstride, w, height); \
+}
+
+PUT_PRED(64)
+PUT_PRED(48)
+PUT_PRED(32)
+PUT_PRED(24)
+PUT_PRED(16)
+PUT_PRED(12)
+PUT_PRED(8)
+PUT_PRED(6)
+PUT_PRED(4)
+PUT_PRED(2)
+
// line zero
#define P3 pix[-4 * xstride]
#define P2 pix[-3 * xstride]