summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264dsp_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/h264dsp_init.c')
-rw-r--r--libavcodec/x86/h264dsp_init.c136
1 files changed, 103 insertions, 33 deletions
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 134d594ca9..0643b37362 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -78,6 +78,11 @@ IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
IDCT_ADD_REP_FUNC2(, 8, 10, avx)
+IDCT_ADD_REP_FUNC2(, 8_422, 8, mmx)
+
+IDCT_ADD_REP_FUNC2(, 8_422, 10, sse2)
+IDCT_ADD_REP_FUNC2(, 8_422, 10, avx)
+
void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul);
void ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul);
@@ -103,28 +108,37 @@ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \
int beta);
#define LF_FUNCS(type, depth) \
-LF_FUNC(h, chroma, depth, mmxext) \
-LF_IFUNC(h, chroma_intra, depth, mmxext) \
-LF_FUNC(v, chroma, depth, mmxext) \
-LF_IFUNC(v, chroma_intra, depth, mmxext) \
-LF_FUNC(h, luma, depth, mmxext) \
-LF_IFUNC(h, luma_intra, depth, mmxext) \
-LF_FUNC(h, luma, depth, sse2) \
-LF_IFUNC(h, luma_intra, depth, sse2) \
-LF_FUNC(v, luma, depth, sse2) \
-LF_IFUNC(v, luma_intra, depth, sse2) \
-LF_FUNC(h, chroma, depth, sse2) \
-LF_IFUNC(h, chroma_intra, depth, sse2) \
-LF_FUNC(v, chroma, depth, sse2) \
-LF_IFUNC(v, chroma_intra, depth, sse2) \
-LF_FUNC(h, luma, depth, avx) \
-LF_IFUNC(h, luma_intra, depth, avx) \
-LF_FUNC(v, luma, depth, avx) \
-LF_IFUNC(v, luma_intra, depth, avx) \
-LF_FUNC(h, chroma, depth, avx) \
-LF_IFUNC(h, chroma_intra, depth, avx) \
-LF_FUNC(v, chroma, depth, avx) \
-LF_IFUNC(v, chroma_intra, depth, avx)
+LF_FUNC(h, chroma, depth, mmxext) \
+LF_IFUNC(h, chroma_intra, depth, mmxext) \
+LF_FUNC(h, chroma422, depth, mmxext) \
+LF_IFUNC(h, chroma422_intra, depth, mmxext) \
+LF_FUNC(v, chroma, depth, mmxext) \
+LF_IFUNC(v, chroma_intra, depth, mmxext) \
+LF_FUNC(h, luma, depth, mmxext) \
+LF_IFUNC(h, luma_intra, depth, mmxext) \
+LF_FUNC(h, luma, depth, sse2) \
+LF_IFUNC(h, luma_intra, depth, sse2) \
+LF_FUNC(v, luma, depth, sse2) \
+LF_IFUNC(v, luma_intra, depth, sse2) \
+LF_FUNC(h, chroma, depth, sse2) \
+LF_IFUNC(h, chroma_intra, depth, sse2) \
+LF_FUNC(h, chroma422, depth, sse2) \
+LF_IFUNC(h, chroma422_intra, depth, sse2) \
+LF_FUNC(v, chroma, depth, sse2) \
+LF_IFUNC(v, chroma_intra, depth, sse2) \
+LF_FUNC(h, luma, depth, avx) \
+LF_IFUNC(h, luma_intra, depth, avx) \
+LF_FUNC(v, luma, depth, avx) \
+LF_IFUNC(v, luma_intra, depth, avx) \
+LF_FUNC(h, chroma, depth, avx) \
+LF_IFUNC(h, chroma_intra, depth, avx) \
+LF_FUNC(h, chroma422, depth, avx) \
+LF_IFUNC(h, chroma422_intra, depth, avx) \
+LF_FUNC(v, chroma, depth, avx) \
+LF_IFUNC(v, chroma_intra, depth, avx)
+
+LF_FUNC(h, luma_mbaff, 8, sse2)
+LF_FUNC(h, luma_mbaff, 8, avx)
LF_FUNCS(uint8_t, 8)
LF_FUNCS(uint16_t, 10)
@@ -155,13 +169,13 @@ LF_IFUNC(v, luma_intra, 10, mmxext)
/* weighted prediction */
#define H264_WEIGHT(W, OPT) \
-void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride, \
+void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, ptrdiff_t stride, \
int height, int log2_denom, \
int weight, int offset);
#define H264_BIWEIGHT(W, OPT) \
void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src, \
- int stride, int height, \
+ ptrdiff_t stride, int height, \
int log2_denom, int weightd, \
int weights, int offset);
@@ -181,7 +195,7 @@ H264_BIWEIGHT_MMX(4)
#define H264_WEIGHT_10(W, DEPTH, OPT) \
void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
- int stride, \
+ ptrdiff_t stride, \
int height, \
int log2_denom, \
int weight, \
@@ -190,7 +204,7 @@ void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
#define H264_BIWEIGHT_10(W, DEPTH, OPT) \
void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
uint8_t *src, \
- int stride, \
+ ptrdiff_t stride, \
int height, \
int log2_denom, \
int weightd, \
@@ -210,6 +224,7 @@ H264_BIWEIGHT_10_SSE(4, 10)
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
const int chroma_format_idc)
{
+#if HAVE_YASM
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMXEXT(cpu_flags) && chroma_format_idc <= 1)
@@ -224,8 +239,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
- if (chroma_format_idc <= 1)
+ if (chroma_format_idc <= 1) {
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
+ } else {
+ c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmx;
+ }
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
if (cpu_flags & AV_CPU_FLAG_CMOV)
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
@@ -244,6 +262,9 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
if (chroma_format_idc <= 1) {
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmxext;
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
+ } else {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
+ c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_mmxext;
}
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
c->h264_v_loop_filter_luma = deblock_v_luma_8_mmxext;
@@ -279,6 +300,20 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
+
+#if ARCH_X86_64
+ c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
+#endif
+
+ c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_sse2;
+ c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
+ if (chroma_format_idc <= 1) {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_sse2;
+ c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2;
+ } else {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_sse2;
+ c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2;
+ }
}
if (EXTERNAL_SSSE3(cpu_flags)) {
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
@@ -289,12 +324,30 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
+#if ARCH_X86_64
+ c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
+#endif
+
+ c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_avx;
+ c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_avx;
+ if (chroma_format_idc <= 1) {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_avx;
+ c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_avx;
+ } else {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_avx;
+ c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_avx;
+ }
}
} else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(cpu_flags)) {
#if ARCH_X86_32
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
+ if (chroma_format_idc <= 1) {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_mmxext;
+ } else {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_mmxext;
+ }
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
@@ -307,8 +360,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
- if (chroma_format_idc <= 1)
+ if (chroma_format_idc <= 1) {
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
+ } else {
+ c->h264_idct_add8 = ff_h264_idct_add8_422_10_sse2;
+ }
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
#if HAVE_ALIGNED_STACK
c->h264_idct8_add = ff_h264_idct8_add_10_sse2;
@@ -325,6 +381,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
+ if (chroma_format_idc <= 1) {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
+ } else {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
+ }
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
@@ -347,8 +408,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
- if (chroma_format_idc <= 1)
+ if (chroma_format_idc <= 1) {
c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
+ } else {
+ c->h264_idct_add8 = ff_h264_idct_add8_422_10_avx;
+ }
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
#if HAVE_ALIGNED_STACK
c->h264_idct8_add = ff_h264_idct8_add_10_avx;
@@ -357,6 +421,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
+ if (chroma_format_idc <= 1) {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
+ } else {
+ c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
+ }
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
@@ -365,4 +434,5 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
#endif /* HAVE_ALIGNED_STACK */
}
}
+#endif
}