summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile3
-rw-r--r--libavcodec/x86/dsputil_mmx.c98
-rw-r--r--libavcodec/x86/h264chroma_init.c116
3 files changed, 118 insertions, 99 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 1feb0607d4..f72765af9b 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -43,7 +43,8 @@ YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o
-YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
+YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264chroma_init.o \
+ x86/h264_chromamc.o \
x86/h264_chromamc_10bit.o
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
x86/h264_deblock_10bit.o \
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 92f532a65e..39383863af 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -1460,49 +1460,6 @@ void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-#define CHROMA_MC(OP, NUM, DEPTH, OPT) \
-void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
- (uint8_t *dst, uint8_t *src, \
- int stride, int h, int x, int y);
-
-CHROMA_MC(put, 2, 10, mmxext)
-CHROMA_MC(avg, 2, 10, mmxext)
-CHROMA_MC(put, 4, 10, mmxext)
-CHROMA_MC(avg, 4, 10, mmxext)
-CHROMA_MC(put, 8, 10, sse2)
-CHROMA_MC(avg, 8, 10, sse2)
-CHROMA_MC(put, 8, 10, avx)
-CHROMA_MC(avg, 8, 10, avx)
-
#if HAVE_INLINE_ASM
/* CAVS-specific */
@@ -1704,11 +1661,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
- }
-
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
#endif
@@ -1773,19 +1725,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
}
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
- }
- if (bit_depth == 10 && CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
- }
-
/* slower than cmov version on AMD */
if (!(mm_flags & AV_CPU_FLAG_3DNOW))
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext;
@@ -1838,11 +1777,6 @@ static av_cold void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
}
-
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
- }
#endif /* HAVE_YASM */
}
@@ -1889,13 +1823,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
}
}
- if (bit_depth == 10) {
- if (CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
- }
- }
-
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
if (mm_flags & AV_CPU_FLAG_ATOM) {
@@ -1916,14 +1843,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
int mm_flags)
{
#if HAVE_SSSE3_EXTERNAL
- const int high_bit_depth = avctx->bits_per_raw_sample > 8;
-
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
- }
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
@@ -1946,20 +1865,6 @@ static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_SSE4_EXTERNAL */
}
-static av_cold void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
-{
-#if HAVE_AVX_EXTERNAL
- const int bit_depth = avctx->bits_per_raw_sample;
-
- if (bit_depth == 10) {
- if (CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
- }
- }
-#endif /* HAVE_AVX_EXTERNAL */
-}
-
av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
@@ -1990,9 +1895,6 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_SSE4)
dsputil_init_sse4(c, avctx, mm_flags);
- if (mm_flags & AV_CPU_FLAG_AVX)
- dsputil_init_avx(c, avctx, mm_flags);
-
if (CONFIG_ENCODERS)
ff_dsputilenc_init_mmx(c, avctx);
}
diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c
new file mode 100644
index 0000000000..f1f3a19257
--- /dev/null
+++ b/libavcodec/x86/h264chroma_init.c
@@ -0,0 +1,116 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/h264chroma.h"
+
+void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+#define CHROMA_MC(OP, NUM, DEPTH, OPT) \
+void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
+ (uint8_t *dst, uint8_t *src, \
+ int stride, int h, int x, int y);
+
+CHROMA_MC(put, 2, 10, mmxext)
+CHROMA_MC(avg, 2, 10, mmxext)
+CHROMA_MC(put, 4, 10, mmxext)
+CHROMA_MC(avg, 4, 10, mmxext)
+CHROMA_MC(put, 8, 10, sse2)
+CHROMA_MC(avg, 8, 10, sse2)
+CHROMA_MC(put, 8, 10, avx)
+CHROMA_MC(avg, 8, 10, avx)
+
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth)
+{
+ int high_bit_depth = bit_depth > 8;
+ int mm_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_MMX(mm_flags) && !high_bit_depth) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
+ }
+
+ if (EXTERNAL_AMD3DNOW(mm_flags) && !high_bit_depth) {
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
+ }
+
+ if (EXTERNAL_MMXEXT(mm_flags) && !high_bit_depth) {
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
+ }
+
+ if (EXTERNAL_MMXEXT(mm_flags) && bit_depth == 10) {
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
+ }
+
+ if (EXTERNAL_SSE2(mm_flags) && bit_depth == 10) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
+ }
+
+ if (EXTERNAL_SSSE3(mm_flags) && !high_bit_depth) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
+ }
+
+ if (EXTERNAL_AVX(mm_flags) && bit_depth == 10) {
+ // AVX implies !cache64.
+ // TODO: Port cache(32|64) detection from x264.
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
+ }
+}