summaryrefslogtreecommitdiff
path: root/libavcodec/x86/hevcdsp_init.c
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2017-03-23 15:39:16 +0100
committerClément Bœsch <u@pkh.me>2017-03-23 15:58:46 +0100
commit947230837cb6d64323590650554dad7abaf9a93f (patch)
tree7829be401e06ac6d5697815d33e245858a2b9a98 /libavcodec/x86/hevcdsp_init.c
parent05510ec0677620ab2251f3985f288ad69cdb193b (diff)
parent112cee0241f5799edff0e4682b9e8639b046dc78 (diff)
Merge commit '112cee0241f5799edff0e4682b9e8639b046dc78'
* commit '112cee0241f5799edff0e4682b9e8639b046dc78': hevc: Add SSE2 and AVX IDCT Merged-by: Clément Bœsch <u@pkh.me>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r--libavcodec/x86/hevcdsp_init.c53
1 files changed, 45 insertions, 8 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index c4d9564173..0b17671d46 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -58,18 +58,31 @@ LFL_FUNCS(uint8_t, 8, avx)
LFL_FUNCS(uint8_t, 10, avx)
LFL_FUNCS(uint8_t, 12, avx)
-#define IDCT_FUNCS(W, opt) \
+#define IDCT_DC_FUNCS(W, opt) \
void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
-IDCT_FUNCS(4x4, mmxext);
-IDCT_FUNCS(8x8, mmxext);
-IDCT_FUNCS(8x8, sse2);
-IDCT_FUNCS(16x16, sse2);
-IDCT_FUNCS(32x32, sse2);
-IDCT_FUNCS(16x16, avx2);
-IDCT_FUNCS(32x32, avx2);
+IDCT_DC_FUNCS(4x4, mmxext);
+IDCT_DC_FUNCS(8x8, mmxext);
+IDCT_DC_FUNCS(8x8, sse2);
+IDCT_DC_FUNCS(16x16, sse2);
+IDCT_DC_FUNCS(32x32, sse2);
+IDCT_DC_FUNCS(16x16, avx2);
+IDCT_DC_FUNCS(32x32, avx2);
+
+#define IDCT_FUNCS(opt) \
+void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
+
+IDCT_FUNCS(sse2)
+IDCT_FUNCS(avx)
#define mc_rep_func(name, bitd, step, W, opt) \
void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
@@ -709,6 +722,8 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
+ c->idct[2] = ff_hevc_idct_16x16_8_sse2;
+ c->idct[3] = ff_hevc_idct_32x32_8_sse2;
}
SAO_BAND_INIT(8, sse2);
@@ -716,6 +731,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
+ c->idct[0] = ff_hevc_idct_4x4_8_sse2;
+ c->idct[1] = ff_hevc_idct_8x8_8_sse2;
+
c->add_residual[1] = ff_hevc_add_residual8_8_sse2;
c->add_residual[2] = ff_hevc_add_residual16_8_sse2;
c->add_residual[3] = ff_hevc_add_residual32_8_sse2;
@@ -745,9 +763,15 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
+
+ c->idct[2] = ff_hevc_idct_16x16_8_avx;
+ c->idct[3] = ff_hevc_idct_32x32_8_avx;
}
SAO_BAND_INIT(8, avx);
+ c->idct[0] = ff_hevc_idct_4x4_8_avx;
+ c->idct[1] = ff_hevc_idct_8x8_8_avx;
+
c->add_residual[1] = ff_hevc_add_residual8_8_avx;
c->add_residual[2] = ff_hevc_add_residual16_8_avx;
c->add_residual[3] = ff_hevc_add_residual32_8_avx;
@@ -864,6 +888,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
+
+ c->idct[2] = ff_hevc_idct_16x16_10_sse2;
+ c->idct[3] = ff_hevc_idct_32x32_10_sse2;
}
SAO_BAND_INIT(10, sse2);
SAO_EDGE_INIT(10, sse2);
@@ -872,6 +899,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
+ c->idct[0] = ff_hevc_idct_4x4_10_sse2;
+ c->idct[1] = ff_hevc_idct_8x8_10_sse2;
+
c->add_residual[1] = ff_hevc_add_residual8_10_sse2;
c->add_residual[2] = ff_hevc_add_residual16_10_sse2;
c->add_residual[3] = ff_hevc_add_residual32_10_sse2;
@@ -897,7 +927,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
+
+ c->idct[2] = ff_hevc_idct_16x16_10_avx;
+ c->idct[3] = ff_hevc_idct_32x32_10_avx;
}
+
+ c->idct[0] = ff_hevc_idct_4x4_10_avx;
+ c->idct[1] = ff_hevc_idct_8x8_10_avx;
+
SAO_BAND_INIT(10, avx);
}
if (EXTERNAL_AVX2(cpu_flags)) {