summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorClément Bœsch <cboesch@gopro.com>2017-01-31 11:20:54 +0100
committerClément Bœsch <cboesch@gopro.com>2017-01-31 15:31:34 +0100
commitd0e132bab68073588dc55844a31b053fb0ee1c83 (patch)
tree1c3b3cd8bee776019f2960cfca1d561019cb84d4 /libavcodec/x86
parentc778a9657dc8a425ac65f89d81d64709f2090a9f (diff)
parent1bd890ad173d79e7906c5e1d06bf0a06cca4519d (diff)
Merge commit '1bd890ad173d79e7906c5e1d06bf0a06cca4519d'
* commit '1bd890ad173d79e7906c5e1d06bf0a06cca4519d': hevc: Separate adding residual to prediction from IDCT This commit should be a noop but isn't because of the following renames: - transform_add → add_residual - transform_skip → dequant - idct_4x4_luma → transform_4x4_luma Merged-by: Clément Bœsch <cboesch@gopro.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/hevc_res_add.asm36
-rw-r--r--libavcodec/x86/hevcdsp.h28
-rw-r--r--libavcodec/x86/hevcdsp_init.c28
3 files changed, 46 insertions, 46 deletions
diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm
index dc3e88a373..869288faf3 100644
--- a/libavcodec/x86/hevc_res_add.asm
+++ b/libavcodec/x86/hevc_res_add.asm
@@ -1,5 +1,5 @@
; /*
-; * Provide SIMD optimizations for transform_add functions for HEVC decoding
+; * Provide SIMD optimizations for add_residual functions for HEVC decoding
; * Copyright (c) 2014 Pierre-Edouard LEPERE
; *
; * This file is part of FFmpeg.
@@ -52,7 +52,7 @@ cextern pw_1023
INIT_MMX mmxext
; void ff_hevc_tranform_add_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-cglobal hevc_transform_add4_8, 3, 4, 6
+cglobal hevc_add_residual4_8, 3, 4, 6
TR_ADD_MMX_4_8
add r1, 16
lea r0, [r0+r2*2]
@@ -135,8 +135,8 @@ cglobal hevc_transform_add4_8, 3, 4, 6
%macro TRANSFORM_ADD_8 0
-; void ff_hevc_transform_add8_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-cglobal hevc_transform_add8_8, 3, 4, 8
+; void ff_hevc_add_residual8_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
+cglobal hevc_add_residual8_8, 3, 4, 8
lea r3, [r2*3]
TR_ADD_SSE_8_8
add r1, 64
@@ -144,8 +144,8 @@ cglobal hevc_transform_add8_8, 3, 4, 8
TR_ADD_SSE_8_8
RET
-; void ff_hevc_transform_add16_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-cglobal hevc_transform_add16_8, 3, 4, 7
+; void ff_hevc_add_residual16_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
+cglobal hevc_add_residual16_8, 3, 4, 7
pxor m0, m0
lea r3, [r2*3]
TR_ADD_SSE_16_32_8 0, r0, r0+r2
@@ -158,8 +158,8 @@ cglobal hevc_transform_add16_8, 3, 4, 7
%endrep
RET
-; void ff_hevc_transform_add32_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-cglobal hevc_transform_add32_8, 3, 4, 7
+; void ff_hevc_add_residual32_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
+cglobal hevc_add_residual32_8, 3, 4, 7
pxor m0, m0
TR_ADD_SSE_16_32_8 0, r0, r0+16
TR_ADD_SSE_16_32_8 64, r0+r2, r0+r2+16
@@ -179,8 +179,8 @@ TRANSFORM_ADD_8
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
-; void ff_hevc_transform_add32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-cglobal hevc_transform_add32_8, 3, 4, 7
+; void ff_hevc_add_residual32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
+cglobal hevc_add_residual32_8, 3, 4, 7
pxor m0, m0
lea r3, [r2*3]
TR_ADD_SSE_16_32_8 0, r0, r0+r2
@@ -195,7 +195,7 @@ cglobal hevc_transform_add32_8, 3, 4, 7
%endif
;-----------------------------------------------------------------------------
-; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride)
+; void ff_hevc_add_residual_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
%macro TR_ADD_SSE_8_10 4
mova m0, [%4]
@@ -310,7 +310,7 @@ cglobal hevc_transform_add32_8, 3, 4, 7
INIT_MMX mmxext
-cglobal hevc_transform_add4_10,3,4, 6
+cglobal hevc_add_residual4_10,3,4, 6
pxor m2, m2
mova m3, [max_pixels_10]
TR_ADD_MMX4_10 r0, r2, r1
@@ -320,10 +320,10 @@ cglobal hevc_transform_add4_10,3,4, 6
RET
;-----------------------------------------------------------------------------
-; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride)
+; void ff_hevc_add_residual_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
INIT_XMM sse2
-cglobal hevc_transform_add8_10,3,4,6
+cglobal hevc_add_residual8_10,3,4,6
pxor m4, m4
mova m5, [max_pixels_10]
lea r3, [r2*3]
@@ -334,7 +334,7 @@ cglobal hevc_transform_add8_10,3,4,6
TR_ADD_SSE_8_10 r0, r2, r3, r1
RET
-cglobal hevc_transform_add16_10,3,4,6
+cglobal hevc_add_residual16_10,3,4,6
pxor m4, m4
mova m5, [max_pixels_10]
@@ -346,7 +346,7 @@ cglobal hevc_transform_add16_10,3,4,6
%endrep
RET
-cglobal hevc_transform_add32_10,3,4,6
+cglobal hevc_add_residual32_10,3,4,6
pxor m4, m4
mova m5, [max_pixels_10]
@@ -361,7 +361,7 @@ cglobal hevc_transform_add32_10,3,4,6
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
-cglobal hevc_transform_add16_10,3,4,6
+cglobal hevc_add_residual16_10,3,4,6
pxor m4, m4
mova m5, [max_pixels_10]
lea r3, [r2*3]
@@ -374,7 +374,7 @@ cglobal hevc_transform_add16_10,3,4,6
%endrep
RET
-cglobal hevc_transform_add32_10,3,4,6
+cglobal hevc_add_residual32_10,3,4,6
pxor m4, m4
mova m5, [max_pixels_10]
diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h
index ad8168fb5b..3cfdc272cf 100644
--- a/libavcodec/x86/hevcdsp.h
+++ b/libavcodec/x86/hevcdsp.h
@@ -239,23 +239,23 @@ WEIGHTING_PROTOTYPES(12, sse4);
///////////////////////////////////////////////////////////////////////////////
// TRANSFORM_ADD
///////////////////////////////////////////////////////////////////////////////
-void ff_hevc_transform_add4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add8_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add16_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add32_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual8_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual16_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual32_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add32_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual32_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add16_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-void ff_hevc_transform_add32_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual16_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
+void ff_hevc_add_residual32_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
#endif // AVCODEC_X86_HEVCDSP_H
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 09eb06d0c9..da73d76638 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -700,7 +700,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
- c->transform_add[0] = ff_hevc_transform_add4_8_mmxext;
+ c->add_residual[0] = ff_hevc_add_residual4_8_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
@@ -716,9 +716,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
- c->transform_add[1] = ff_hevc_transform_add8_8_sse2;
- c->transform_add[2] = ff_hevc_transform_add16_8_sse2;
- c->transform_add[3] = ff_hevc_transform_add32_8_sse2;
+ c->add_residual[1] = ff_hevc_add_residual8_8_sse2;
+ c->add_residual[2] = ff_hevc_add_residual16_8_sse2;
+ c->add_residual[3] = ff_hevc_add_residual32_8_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
if(ARCH_X86_64) {
@@ -748,9 +748,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
}
SAO_BAND_INIT(8, avx);
- c->transform_add[1] = ff_hevc_transform_add8_8_avx;
- c->transform_add[2] = ff_hevc_transform_add16_8_avx;
- c->transform_add[3] = ff_hevc_transform_add32_8_avx;
+ c->add_residual[1] = ff_hevc_add_residual8_8_avx;
+ c->add_residual[2] = ff_hevc_add_residual16_8_avx;
+ c->add_residual[3] = ff_hevc_add_residual32_8_avx;
}
if (EXTERNAL_AVX2(cpu_flags)) {
c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
@@ -850,11 +850,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
- c->transform_add[3] = ff_hevc_transform_add32_8_avx2;
+ c->add_residual[3] = ff_hevc_add_residual32_8_avx2;
}
} else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(cpu_flags)) {
- c->transform_add[0] = ff_hevc_transform_add4_10_mmxext;
+ c->add_residual[0] = ff_hevc_add_residual4_10_mmxext;
c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
}
@@ -872,9 +872,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
- c->transform_add[1] = ff_hevc_transform_add8_10_sse2;
- c->transform_add[2] = ff_hevc_transform_add16_10_sse2;
- c->transform_add[3] = ff_hevc_transform_add32_10_sse2;
+ c->add_residual[1] = ff_hevc_add_residual8_10_sse2;
+ c->add_residual[2] = ff_hevc_add_residual16_10_sse2;
+ c->add_residual[3] = ff_hevc_add_residual32_10_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
@@ -1053,8 +1053,8 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
SAO_BAND_INIT(10, avx2);
SAO_EDGE_INIT(10, avx2);
- c->transform_add[2] = ff_hevc_transform_add16_10_avx2;
- c->transform_add[3] = ff_hevc_transform_add32_10_avx2;
+ c->add_residual[2] = ff_hevc_add_residual16_10_avx2;
+ c->add_residual[3] = ff_hevc_add_residual32_10_avx2;
}
} else if (bit_depth == 12) {