summaryrefslogtreecommitdiff
path: root/libavcodec/x86/dsputil_mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/dsputil_mmx.c')
-rw-r--r--libavcodec/x86/dsputil_mmx.c32
1 files changed, 32 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index f98e6ae0fa..a0cb11aa40 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
+ if (!high_bit_depth) {
c->clear_block = clear_block_mmx;
c->clear_blocks = clear_blocks_mmx;
if ((mm_flags & AV_CPU_FLAG_SSE) &&
@@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->clear_block = clear_block_sse;
c->clear_blocks = clear_blocks_sse;
}
+ }
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
@@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
+ if (!high_bit_depth) {
SET_HPEL_FUNCS(put, 0, 16, mmx);
SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
SET_HPEL_FUNCS(avg, 0, 16, mmx);
@@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
SET_HPEL_FUNCS(avg, 1, 8, mmx);
SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
+ }
#if ARCH_X86_32 || !HAVE_YASM
c->gmc= gmc_mmx;
#endif
#if ARCH_X86_32 && HAVE_YASM
+ if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_mmx;
#endif
c->add_bytes= add_bytes_mmx;
c->add_bytes_l2= add_bytes_l2_mmx;
+ if (!high_bit_depth)
c->draw_edges = draw_edges_mmx;
if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
@@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#if HAVE_YASM
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
+ }
c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
@@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->prefetch = prefetch_mmx2;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
@@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+ }
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+ if (!high_bit_depth) {
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+ }
if (CONFIG_VP3_DECODER && HAVE_YASM) {
c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
@@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
+ if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+ }
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
@@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
+ if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
+ }
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
#endif
@@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
c->prefetch = prefetch_3dnow;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
@@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
}
+ }
if (CONFIG_VP3_DECODER
&& (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
@@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
+ if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+ }
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
@@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
#if HAVE_YASM
+ if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
+ }
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
@@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2);
+ }
}
if(mm_flags & AV_CPU_FLAG_SSE2){
+ if (!high_bit_depth) {
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
@@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, sse2);
H264_QPEL_FUNCS(3, 2, sse2);
H264_QPEL_FUNCS(3, 3, sse2);
+ }
}
#if HAVE_SSSE3
if(mm_flags & AV_CPU_FLAG_SSSE3){
+ if (!high_bit_depth) {
H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3);
@@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, ssse3);
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
+ }
c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
#if HAVE_YASM
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
+ }
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
@@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
+ if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
c->gmc= gmc_sse;
#endif