summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-07-27 00:19:25 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-07-27 00:20:48 +0200
commit706f81a2c27552fdc9d216834eedd36f5804c080 (patch)
tree46931fd1a7d52b2d6c815adc2a24dec1429893b3 /libavcodec
parent226b290f9f73d691ef1423d5c105f0bb0577d588 (diff)
parent1a880b2fb8456ce68eefe5902bac95fea1e6a72d (diff)
Merge commit '1a880b2fb8456ce68eefe5902bac95fea1e6a72d'
* commit '1a880b2fb8456ce68eefe5902bac95fea1e6a72d': hevc: SSE2 and SSSE3 loop filters Conflicts: libavcodec/hevcdsp.c libavcodec/hevcdsp.h libavcodec/x86/Makefile libavcodec/x86/hevc_deblock.asm libavcodec/x86/hevcdsp_init.c See: de7b89fd43f850d77cf24ad6ae50185dfe391e91 and several others Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/hevcdsp.c4
-rw-r--r--libavcodec/hevcdsp.h2
-rw-r--r--libavcodec/x86/hevc_deblock.asm12
-rw-r--r--libavcodec/x86/hevcdsp_init.c48
4 files changed, 35 insertions, 31 deletions
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 218c48f84c..3eae5419dd 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -247,5 +247,7 @@ int i = 0;
HEVC_DSP(8);
break;
}
- if (ARCH_X86) ff_hevcdsp_init_x86(hevcdsp, bit_depth);
+
+ if (ARCH_X86)
+ ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
}
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 03d4246ca0..c18bc865e9 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -123,6 +123,6 @@ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
extern const int8_t ff_hevc_epel_filters[7][4];
extern const int8_t ff_hevc_qpel_filters[3][16];
-void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
#endif /* AVCODEC_HEVCDSP_H */
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index 7874e3318f..f7cd031493 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -660,7 +660,8 @@ ALIGN 16
INIT_XMM sse2
;-----------------------------------------------------------------------------
-; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q)
+; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
+; uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_v_loop_filter_chroma_8, 3, 5, 7, pix, stride, tc, pix0, r3stride
sub pixq, 2
@@ -693,7 +694,8 @@ cglobal hevc_v_loop_filter_chroma_12, 3, 5, 7, pix, stride, tc, pix0, r3stride
RET
;-----------------------------------------------------------------------------
-; void ff_hevc_h_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q
+; void ff_hevc_h_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
+; uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_h_loop_filter_chroma_8, 3, 4, 7, pix, stride, tc, pix0
mov pix0q, pixq
@@ -749,7 +751,8 @@ cglobal hevc_h_loop_filter_chroma_12, 3, 4, 7, pix, stride, tc, pix0
%if ARCH_X86_64
%macro LOOP_FILTER_LUMA 0
;-----------------------------------------------------------------------------
-; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
+; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
+; int *_tc, uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_v_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc
sub r0, 4
@@ -788,7 +791,8 @@ cglobal hevc_v_loop_filter_luma_12, 4, 15, 16, pix, stride, beta, tc
RET
;-----------------------------------------------------------------------------
-; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
+; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
+; int *_tc, uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0, src3stride
lea src3strideq, [3 * strideq]
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index fb3357bef6..d8c15ba695 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -28,21 +28,19 @@
#include "libavcodec/hevcdsp.h"
#include "libavcodec/x86/hevcdsp.h"
+#define LFC_FUNC(DIR, DEPTH, OPT) \
+void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
-#define LFC_FUNC(DIR, DEPTH, OPT) \
-void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
-
-#define LFL_FUNC(DIR, DEPTH, OPT) \
-void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int _beta, int *_tc, \
-uint8_t *_no_p, uint8_t *_no_q);
+#define LFL_FUNC(DIR, DEPTH, OPT) \
+void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
#define LFC_FUNCS(type, depth, opt) \
-LFC_FUNC(h, depth, opt) \
-LFC_FUNC(v, depth, opt)
+ LFC_FUNC(h, depth, opt) \
+ LFC_FUNC(v, depth, opt)
#define LFL_FUNCS(type, depth, opt) \
-LFL_FUNC(h, depth, opt) \
-LFL_FUNC(v, depth, opt)
+ LFL_FUNC(h, depth, opt) \
+ LFL_FUNC(v, depth, opt)
LFC_FUNCS(uint8_t, 8, sse2)
LFC_FUNCS(uint8_t, 10, sse2)
@@ -456,16 +454,16 @@ mc_bi_w_funcs(qpel_hv, 12, sse4);
PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
-void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
+void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
{
- int mm_flags = av_get_cpu_flags();
+ int cpu_flags = av_get_cpu_flags();
if (bit_depth == 8) {
- if (EXTERNAL_MMXEXT(mm_flags)) {
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
}
- if (EXTERNAL_SSE2(mm_flags)) {
+ if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
if (ARCH_X86_64) {
@@ -477,11 +475,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
}
- if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
+ if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
}
- if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
+ if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
@@ -493,16 +491,16 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
}
- if (EXTERNAL_AVX2(mm_flags)) {
+ if (EXTERNAL_AVX2(cpu_flags)) {
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
}
} else if (bit_depth == 10) {
- if (EXTERNAL_MMXEXT(mm_flags)) {
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
}
- if (EXTERNAL_SSE2(mm_flags)) {
+ if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
if (ARCH_X86_64) {
@@ -514,11 +512,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
}
- if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
+ if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
}
- if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
+ if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
@@ -529,13 +527,13 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
}
- if (EXTERNAL_AVX2(mm_flags)) {
+ if (EXTERNAL_AVX2(cpu_flags)) {
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
}
} else if (bit_depth == 12) {
- if (EXTERNAL_SSE2(mm_flags)) {
+ if (EXTERNAL_SSE2(cpu_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
if (ARCH_X86_64) {
@@ -543,11 +541,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
}
}
- if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
+ if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
}
- if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
+ if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);