34 files changed, 753 insertions, 365 deletions
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 610f92acbb..32bb0fc932 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -270,6 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
 
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
     c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
     c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
@@ -311,6 +314,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
     c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
 
     c->clear_blocks = clear_blocks_axp;
+    }
 
     /* amask clears all bits that correspond to present features.  */
     if (amask(AMASK_MVI) == 0) {
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 92796c3145..777a2f954e 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -75,6 +75,8 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
 
 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     ff_put_pixels_clamped = c->put_pixels_clamped;
     ff_add_pixels_clamped = c->add_pixels_clamped;
 
@@ -95,6 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
 
     c->add_pixels_clamped = ff_add_pixels_clamped_arm;
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
@@ -112,6 +115,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
     c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm;
     c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
     c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
+    }
 
     if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
     if (HAVE_ARMV6)   ff_dsputil_init_armv6(c, avctx);
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 362050c6ea..7584aeefc6 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -72,6 +72,8 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
 
 void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO ||
                            avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) {
         c->idct_put              = ff_simple_idct_put_armv6;
@@ -80,6 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
         c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
     }
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
@@ -100,6 +103,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
 
     c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6;
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
+    }
 
     c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
     c->get_pixels = ff_get_pixels_armv6;
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index cd58011a2c..3bc053c864 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -173,6 +173,8 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
 
 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     if (!avctx->lowres) {
         if (avctx->idct_algo == FF_IDCT_AUTO ||
             avctx->idct_algo == FF_IDCT_SIMPLENEON) {
@@ -190,6 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
         }
     }
 
+    if (!high_bit_depth) {
     c->clear_block  = ff_clear_block_neon;
     c->clear_blocks = ff_clear_blocks_neon;
 
@@ -213,12 +216,14 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 
     c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
+    }
 
     c->add_pixels_clamped = ff_add_pixels_clamped_neon;
     c->put_pixels_clamped = ff_put_pixels_clamped_neon;
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
 
     if (CONFIG_H264_DECODER) {
+        if (!high_bit_depth) {
         c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
         c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
         c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
@@ -294,6 +299,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
         c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
         c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
         c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
+        }
     }
 
     if (CONFIG_VP3_DECODER) {
diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
index e83edb5bb8..86f8fddfcf 100644
--- a/libavcodec/arm/dsputil_iwmmxt.c
+++ b/libavcodec/arm/dsputil_iwmmxt.c
@@ -155,6 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
     if (avctx->dsp_mask) {
         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -167,6 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
 
     c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
 
+    if (!high_bit_depth) {
     c->clear_blocks = clear_blocks_iwmmxt;
 
     c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
@@ -204,4 +206,5 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
     c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
     c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
     c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
+    }
 }
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index 20f5ac2076..c2399e50ff 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -92,8 +92,9 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
                              DCTELEM *block, int stride,
                              const uint8_t nnzc[6*8]);
 
-static void ff_h264dsp_init_neon(H264DSPContext *c)
+static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
 {
+    if (bit_depth == 8) {
     c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
     c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
     c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
@@ -125,9 +126,10 @@ static void ff_h264dsp_init_neon(H264DSPContext *c)
     c->h264_idct8_add       = ff_h264_idct8_add_neon;
     c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
     c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
+    }
 }
 
-void ff_h264dsp_init_arm(H264DSPContext *c)
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
 {
-    if (HAVE_NEON) ff_h264dsp_init_neon(c);
+    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
 }
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index 3f1c5c64fd..cae32d7567 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -42,8 +42,13 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
 void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
 void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
 
-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
 {
+    const int high_depth = bit_depth > 8;
+
+    if (high_depth)
+        return;
+
     h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
     h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
     if (codec_id != CODEC_ID_VP8)
@@ -69,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
         h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
 }
 
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id)
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, bit_depth)
 {
-    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id);
+    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth);
 }
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index 65d0308018..0db2d8baf8 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -197,11 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
 
 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     c->get_pixels         = ff_bfin_get_pixels;
     c->diff_pixels        = ff_bfin_diff_pixels;
     c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
     c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
 
+    if (!high_bit_depth)
     c->clear_blocks       = bfin_clear_blocks;
     c->pix_sum            = ff_bfin_pix_sum;
     c->pix_norm1          = ff_bfin_pix_norm1;
@@ -228,6 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
     c->sse[1] = ff_bfin_sse8;
     c->sse[2] = ff_bfin_sse4;
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = bfin_put_pixels16;
     c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
     c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
@@ -247,6 +251,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
     c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
     c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
 /*     c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
+    }
 
     if (avctx->dct_algo == FF_DCT_AUTO)
         c->fdct               = ff_bfin_fdct;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 078e172d00..4389289d82 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -43,6 +43,15 @@
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t ff_squareTbl[512] = {0, };
 
+#define BIT_DEPTH 9
+#include "dsputil_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "dsputil_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 8
 #include "dsputil_template.c"
 
 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
@@ -619,10 +628,10 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
 
 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     switch(width){
-    case 2: put_pixels2_c (dst, src, stride, height); break;
-    case 4: put_pixels4_c (dst, src, stride, height); break;
-    case 8: put_pixels8_c (dst, src, stride, height); break;
-    case 16:put_pixels16_c(dst, src, stride, height); break;
+    case 2: put_pixels2_8_c (dst, src, stride, height); break;
+    case 4: put_pixels4_8_c (dst, src, stride, height); break;
+    case 8: put_pixels8_8_c (dst, src, stride, height); break;
+    case 16:put_pixels16_8_c(dst, src, stride, height); break;
     }
 }
 
@@ -716,10 +725,10 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
 
 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     switch(width){
-    case 2: avg_pixels2_c (dst, src, stride, height); break;
-    case 4: avg_pixels4_c (dst, src, stride, height); break;
-    case 8: avg_pixels8_c (dst, src, stride, height); break;
-    case 16:avg_pixels16_c(dst, src, stride, height); break;
+    case 2: avg_pixels2_8_c (dst, src, stride, height); break;
+    case 4: avg_pixels4_8_c (dst, src, stride, height); break;
+    case 8: avg_pixels8_8_c (dst, src, stride, height); break;
+    case 16:avg_pixels16_8_c(dst, src, stride, height); break;
     }
 }
 
@@ -953,7 +962,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst
 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
-    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
 }\
 \
 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -963,7 +972,7 @@ static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
-    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
 }\
 \
 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -971,7 +980,7 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
-    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
 }\
 \
 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -985,7 +994,7 @@ static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
-    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -996,7 +1005,7 @@ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1004,9 +1013,9 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1017,7 +1026,7 @@ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1025,9 +1034,9 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1038,7 +1047,7 @@ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1046,9 +1055,9 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1059,7 +1068,7 @@ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1067,23 +1076,23 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[72];\
     uint8_t halfHV[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[72];\
     uint8_t halfHV[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1094,14 +1103,14 @@ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
     uint8_t halfH[72];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1113,14 +1122,14 @@ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
     uint8_t halfH[72];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
 }\
 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1132,7 +1141,7 @@ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
-    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
 }\
 \
 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1142,7 +1151,7 @@ static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
-    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
 }\
 \
 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1150,7 +1159,7 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
-    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
 }\
 \
 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1164,7 +1173,7 @@ static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
-    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1175,7 +1184,7 @@ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1183,9 +1192,9 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1196,7 +1205,7 @@ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1204,9 +1213,9 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1217,7 +1226,7 @@ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1225,9 +1234,9 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1238,7 +1247,7 @@ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1246,23 +1255,23 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[272];\
     uint8_t halfHV[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[272];\
     uint8_t halfHV[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1273,14 +1282,14 @@ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
     uint8_t halfH[272];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1292,14 +1301,14 @@ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
     uint8_t halfH[272];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
 }\
 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1327,7 +1336,7 @@ QPEL_MC(0, avg_       , _       , op_avg)
 #define put_qpel16_mc00_c ff_put_pixels16x16_c
 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
-#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
+#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
 
 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -1349,16 +1358,16 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
 
 #if CONFIG_RV40_DECODER
 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    put_pixels16_xy2_c(dst, src, stride, 16);
+    put_pixels16_xy2_8_c(dst, src, stride, 16);
 }
 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    avg_pixels16_xy2_c(dst, src, stride, 16);
+    avg_pixels16_xy2_8_c(dst, src, stride, 16);
 }
 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    put_pixels8_xy2_c(dst, src, stride, 8);
+    put_pixels8_xy2_8_c(dst, src, stride, 8);
 }
 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    avg_pixels8_xy2_c(dst, src, stride, 8);
+    avg_pixels8_xy2_8_c(dst, src, stride, 8);
 }
 #endif /* CONFIG_RV40_DECODER */
 
@@ -1394,7 +1403,7 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t half[64];
     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
-    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+    put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
 }
 
 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
@@ -1404,7 +1413,7 @@ static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t half[64];
     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
-    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+    put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
 }
 
 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
@@ -1418,7 +1427,7 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
-    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
 }
 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t halfH[88];
@@ -1427,7 +1436,7 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
-    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
 }
 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t halfH[88];
@@ -2863,8 +2872,24 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
             c->idct_put= ff_jref_idct4_put;
             c->idct_add= ff_jref_idct4_add;
         }else{
-            c->idct_put= ff_h264_lowres_idct_put_c;
-            c->idct_add= ff_h264_lowres_idct_add_c;
+            if (avctx->codec_id != CODEC_ID_H264) {
+                c->idct_put= ff_h264_lowres_idct_put_8_c;
+                c->idct_add= ff_h264_lowres_idct_add_8_c;
+            } else {
+                switch (avctx->bits_per_raw_sample) {
+                    case 9:
+                        c->idct_put= ff_h264_lowres_idct_put_9_c;
+                        c->idct_add= ff_h264_lowres_idct_add_9_c;
+                        break;
+                    case 10:
+                        c->idct_put= ff_h264_lowres_idct_put_10_c;
+                        c->idct_add= ff_h264_lowres_idct_add_10_c;
+                        break;
+                    default:
+                        c->idct_put= ff_h264_lowres_idct_put_8_c;
+                        c->idct_add= ff_h264_lowres_idct_add_8_c;
+                }
+            }
         }
         c->idct    = j_rev_dct4;
         c->idct_permutation_type= FF_NO_IDCT_PERM;
@@ -2922,14 +2947,9 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
     c->put_pixels_nonclamped = put_pixels_nonclamped_c;
     c->add_pixels_clamped = ff_add_pixels_clamped_c;
-    c->add_pixels8 = add_pixels8_c;
-    c->add_pixels4 = add_pixels4_c;
     c->sum_abs_dctelem = sum_abs_dctelem_c;
-    c->emulated_edge_mc = ff_emulated_edge_mc;
     c->gmc1 = gmc1_c;
     c->gmc = ff_gmc_c;
-    c->clear_block = clear_block_c;
-    c->clear_blocks = clear_blocks_c;
     c->pix_sum = pix_sum_c;
     c->pix_norm1 = pix_norm1_c;
 
@@ -2947,30 +2967,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->pix_abs[1][2] = pix_abs8_y2_c;
     c->pix_abs[1][3] = pix_abs8_xy2_c;
 
-#define dspfunc(PFX, IDX, NUM) \
-    c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
-    c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c;  \
-    c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c;  \
-    c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
-
-    dspfunc(put, 0, 16);
-    dspfunc(put_no_rnd, 0, 16);
-    dspfunc(put, 1, 8);
-    dspfunc(put_no_rnd, 1, 8);
-    dspfunc(put, 2, 4);
-    dspfunc(put, 3, 2);
-
-    dspfunc(avg, 0, 16);
-    dspfunc(avg_no_rnd, 0, 16);
-    dspfunc(avg, 1, 8);
-    dspfunc(avg_no_rnd, 1, 8);
-    dspfunc(avg, 2, 4);
-    dspfunc(avg, 3, 2);
-#undef dspfunc
-
-    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
-    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
-
     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
@@ -3021,23 +3017,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     dspfunc(avg_qpel, 1, 8);
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
 
-    dspfunc(put_h264_qpel, 0, 16);
-    dspfunc(put_h264_qpel, 1, 8);
-    dspfunc(put_h264_qpel, 2, 4);
-    dspfunc(put_h264_qpel, 3, 2);
-    dspfunc(avg_h264_qpel, 0, 16);
-    dspfunc(avg_h264_qpel, 1, 8);
-    dspfunc(avg_h264_qpel, 2, 4);
-
 #undef dspfunc
-    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
-    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
-    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
-    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
-    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
-    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
-
-    c->draw_edges = draw_edges_c;
 
 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
     ff_mlp_init(c, avctx);
@@ -3162,6 +3142,92 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
 
+#undef FUNC
+#undef FUNCC
+#define FUNC(f, depth) f ## _ ## depth
+#define FUNCC(f, depth) f ## _ ## depth ## _c
+
+#define dspfunc1(PFX, IDX, NUM, depth)\
+    c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
+    c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
+    c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
+    c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
+
+#define dspfunc2(PFX, IDX, NUM, depth)\
+    c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
+    c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
+    c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
+    c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
+    c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
+    c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
+    c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
+    c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
+    c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
+    c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
+    c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
+    c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
+    c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
+    c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
+    c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
+    c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
+
+
+#define BIT_DEPTH_FUNCS(depth)\
+    c->draw_edges                    = FUNCC(draw_edges            , depth);\
+    c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
+    c->clear_block                   = FUNCC(clear_block           , depth);\
+    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
+    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
+    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
+    c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
+    c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
+\
+    c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
+    c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
+    c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
+    c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
+    c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
+    c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
+\
+    dspfunc1(put       , 0, 16, depth);\
+    dspfunc1(put       , 1,  8, depth);\
+    dspfunc1(put       , 2,  4, depth);\
+    dspfunc1(put       , 3,  2, depth);\
+    dspfunc1(put_no_rnd, 0, 16, depth);\
+    dspfunc1(put_no_rnd, 1,  8, depth);\
+    dspfunc1(avg       , 0, 16, depth);\
+    dspfunc1(avg       , 1,  8, depth);\
+    dspfunc1(avg       , 2,  4, depth);\
+    dspfunc1(avg       , 3,  2, depth);\
+    dspfunc1(avg_no_rnd, 0, 16, depth);\
+    dspfunc1(avg_no_rnd, 1,  8, depth);\
+\
+    dspfunc2(put_h264_qpel, 0, 16, depth);\
+    dspfunc2(put_h264_qpel, 1,  8, depth);\
+    dspfunc2(put_h264_qpel, 2,  4, depth);\
+    dspfunc2(put_h264_qpel, 3,  2, depth);\
+    dspfunc2(avg_h264_qpel, 0, 16, depth);\
+    dspfunc2(avg_h264_qpel, 1,  8, depth);\
+    dspfunc2(avg_h264_qpel, 2,  4, depth);
+
+    if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
+        BIT_DEPTH_FUNCS(8)
+    } else {
+        switch (avctx->bits_per_raw_sample) {
+            case 9:
+                BIT_DEPTH_FUNCS(9)
+                break;
+            case 10:
+                BIT_DEPTH_FUNCS(10)
+                break;
+            default:
+                av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
+                BIT_DEPTH_FUNCS(8)
+                break;
+        }
+    }
+
+
     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
     if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 9b4aef758a..78d21527a5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -53,19 +53,24 @@ void ff_fdct_mmx(DCTELEM *block);
 void ff_fdct_mmx2(DCTELEM *block);
 void ff_fdct_sse2(DCTELEM *block);
 
-void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
-void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
-void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-
-void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul);
-void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
+#define H264_IDCT(depth) \
+void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_lowres_idct_add_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
+void ff_h264_lowres_idct_put_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
+void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
+void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
+
+H264_IDCT( 8)
+H264_IDCT( 9)
+H264_IDCT(10)
+
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
 
@@ -82,10 +87,20 @@ extern const uint8_t ff_zigzag248_direct[64];
 extern uint32_t ff_squareTbl[512];
 extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
 
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
+#define PUTAVG_PIXELS(depth)\
+void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);
+
+PUTAVG_PIXELS( 8)
+PUTAVG_PIXELS( 9)
+PUTAVG_PIXELS(10)
+
+#define ff_put_pixels8x8_c ff_put_pixels8x8_8_c
+#define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c
+#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
+#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
 
 /* VP3 DSP functions */
 void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
@@ -187,10 +202,17 @@ typedef struct ScanTable{
 
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
 
-void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize,
-                         int block_w, int block_h,
+#define EMULATED_EDGE(depth) \
+void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
+                         int block_w, int block_h,\
                          int src_x, int src_y, int w, int h);
 
+EMULATED_EDGE(8)
+EMULATED_EDGE(9)
+EMULATED_EDGE(10)
+
+#define ff_emulated_edge_mc ff_emulated_edge_mc_8
+
 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
 void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
@@ -562,6 +584,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
 
 #define         BYTE_VEC32(c)   ((c)*0x01010101UL)
+#define         BYTE_VEC64(c)   ((c)*0x0001000100010001UL)
 
 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
 {
@@ -573,6 +596,16 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
     return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
 }
 
+static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
+static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
 static inline int get_penalty_factor(int lambda, int lambda2, int type){
     switch(type&0xFF){
     default:
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index f69c4671f9..8ca6d3e414 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -27,25 +27,55 @@
  * DSP utils
  */
 
-#include "dsputil.h"
+#include "high_bit_depth.h"
 
-#define BIT_DEPTH 8
+static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN2P(dst   , AV_RN2P(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
-#define pixel  uint8_t
-#define pixel2 uint16_t
-#define pixel4 uint32_t
-#define dctcoef int16_t
+static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN4P(dst   , AV_RN4P(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
-#define FUNC(a)  a
-#define FUNCC(a) a ## _c
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define AV_RN2P AV_RN16
-#define AV_RN4P AV_RN32
-#define PIXEL_MAX ((1<<BIT_DEPTH)-1)
+static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN4P(dst                , AV_RN4P(src                ));
+        AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
-#define no_rnd_avg_pixel4 no_rnd_avg32
-#define    rnd_avg_pixel4    rnd_avg32
+static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN4P(dst                 , AV_RN4P(src                 ));
+        AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
+        AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
+        AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
 /* draw the edges of width 'w' of an image of size width, height */
 //FIXME check that this is ok for mpeg4 interlaced
@@ -1317,10 +1347,22 @@ H264_MC(avg_, 16)
 #undef op2_avg
 #undef op2_put
 
-#define put_h264_qpel8_mc00_c  ff_put_pixels8x8_c
-#define avg_h264_qpel8_mc00_c  ff_avg_pixels8x8_c
-#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
-#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
+#if BIT_DEPTH == 8
+#   define put_h264_qpel8_mc00_8_c  ff_put_pixels8x8_8_c
+#   define avg_h264_qpel8_mc00_8_c  ff_avg_pixels8x8_8_c
+#   define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
+#   define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
+#elif BIT_DEPTH == 9
+#   define put_h264_qpel8_mc00_9_c  ff_put_pixels8x8_9_c
+#   define avg_h264_qpel8_mc00_9_c  ff_avg_pixels8x8_9_c
+#   define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
+#   define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
+#elif BIT_DEPTH == 10
+#   define put_h264_qpel8_mc00_10_c  ff_put_pixels8x8_10_c
+#   define avg_h264_qpel8_mc00_10_c  ff_avg_pixels8x8_10_c
+#   define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
+#   define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
+#endif
 
 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
     FUNCC(put_pixels8)(dst, src, stride, 8);
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 0fcb7dbbdb..1388dd5c09 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -783,7 +783,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
     dst->list_counts              = src->list_counts;
 
     dst->s.obmc_scratchpad = NULL;
-    ff_h264_pred_init(&dst->hpc, src->s.codec_id);
+    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
 }
 
 /**
@@ -811,8 +811,8 @@ static av_cold void common_init(H264Context *h){
     s->height = s->avctx->height;
     s->codec_id= s->avctx->codec->id;
 
-    ff_h264dsp_init(&h->h264dsp);
-    ff_h264_pred_init(&h->hpc, s->codec_id);
+    ff_h264dsp_init(&h->h264dsp, 8);
+    ff_h264_pred_init(&h->hpc, s->codec_id, 8);
 
     h->dequant_coeff_pps= -1;
     s->unrestricted_mv=1;
@@ -895,7 +895,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     ff_h264_decode_init_vlc();
 
     h->pixel_shift = 0;
-    h->sps.bit_depth_luma = 8;
+    h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
 
     h->thread_context[0] = h;
     h->outputed_poc = INT_MIN;
@@ -2998,6 +2998,20 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
 
             if(avctx->has_b_frames < 2)
                 avctx->has_b_frames= !s->low_delay;
+
+            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
+                if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
+                    avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
+                    h->pixel_shift = h->sps.bit_depth_luma > 8;
+
+                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
+                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
+                    dsputil_init(&s->dsp, s->avctx);
+                } else {
+                    av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
+                    return -1;
+                }
+            }
             break;
         case NAL_PPS:
             init_get_bits(&s->gb, ptr, bit_length);
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 22b2086243..64f4856189 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -29,57 +29,83 @@
 #include "avcodec.h"
 #include "h264dsp.h"
 
+#define BIT_DEPTH 8
 #include "h264dsp_template.c"
+#undef BIT_DEPTH
 
-void ff_h264dsp_init(H264DSPContext *c)
-{
-    c->h264_idct_add= ff_h264_idct_add_c;
-    c->h264_idct8_add= ff_h264_idct8_add_c;
-    c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
-    c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
-    c->h264_idct_add16     = ff_h264_idct_add16_c;
-    c->h264_idct8_add4     = ff_h264_idct8_add4_c;
-    c->h264_idct_add8      = ff_h264_idct_add8_c;
-    c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
-    c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c;
-    c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c;
+#define BIT_DEPTH 9
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
 
-    c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
-    c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
-    c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
-    c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
-    c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
-    c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
-    c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
-    c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
-    c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
-    c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
-    c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
-    c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
-    c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
-    c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
-    c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
-    c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
-    c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
-    c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
-    c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
-    c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
+{
+#undef FUNC
+#define FUNC(a, depth) a ## _ ## depth ## _c
 
-    c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
-    c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
-    c->h264_h_loop_filter_luma_mbaff= h264_h_loop_filter_luma_mbaff_c;
-    c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
-    c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
-    c->h264_h_loop_filter_luma_mbaff_intra= h264_h_loop_filter_luma_mbaff_intra_c;
-    c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
-    c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
-    c->h264_h_loop_filter_chroma_mbaff= h264_h_loop_filter_chroma_mbaff_c;
-    c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
-    c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
-    c->h264_h_loop_filter_chroma_mbaff_intra= h264_h_loop_filter_chroma_mbaff_intra_c;
+#define H264_DSP(depth) \
+    c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
+    c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
+    c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
+    c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
+    c->h264_idct_add16     = FUNC(ff_h264_idct_add16, depth);\
+    c->h264_idct8_add4     = FUNC(ff_h264_idct8_add4, depth);\
+    c->h264_idct_add8      = FUNC(ff_h264_idct_add8, depth);\
+    c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
+    c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
+    c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
+\
+    c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
+    c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
+    c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
+    c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
+    c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
+    c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
+    c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
+    c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
+    c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
+    c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
+    c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
+    c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
+    c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
+    c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
+    c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
+    c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
+    c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
+    c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
+    c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
+    c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
+\
+    c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
+    c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
+    c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\
+    c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\
+    c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
+    c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
+    c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
+    c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
+    c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
+    c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
+    c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
+    c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
     c->h264_loop_filter_strength= NULL;
 
-    if (ARCH_ARM) ff_h264dsp_init_arm(c);
-    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c);
-    if (HAVE_MMX) ff_h264dsp_init_x86(c);
+    switch (bit_depth) {
+    case 9:
+        H264_DSP(9);
+        break;
+    case 10:
+        H264_DSP(10);
+        break;
+    default:
+        H264_DSP(8);
+        break;
+    }
+
+    if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
+    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth);
+    if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth);
 }
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 7eb50be342..87a1dd9722 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -75,9 +75,9 @@ typedef struct H264DSPContext{
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
 }H264DSPContext;
 
-void ff_h264dsp_init(H264DSPContext *c);
-void ff_h264dsp_init_arm(H264DSPContext *c);
-void ff_h264dsp_init_ppc(H264DSPContext *c);
-void ff_h264dsp_init_x86(H264DSPContext *c);
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth);
 
 #endif /* AVCODEC_H264DSP_H */
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index 7190f4d9ca..91162ea900 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -25,10 +25,7 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#define BIT_DEPTH 8
-#define pixel uint8_t
-#define av_clip_pixel av_clip_uint8
-#define FUNCC(a) a ## _c
+#include "high_bit_depth.h"
 
 #define op_scale1(x)  block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
 #define op_scale2(x)  dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index d79777571a..1634a00835 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -25,4 +25,14 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
+#define BIT_DEPTH 8
 #include "h264idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "h264idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264idct_template.c"
+#undef BIT_DEPTH
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index ed1f18c9b6..39c9a1c9eb 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -25,7 +25,7 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#include "dsputil.h"
+#include "high_bit_depth.h"
 
 #ifndef AVCODEC_H264IDCT_INTERNAL_H
 #define AVCODEC_H264IDCT_INTERNAL_H
@@ -42,12 +42,6 @@ static const uint8_t scan8[16 + 2*4]={
 };
 #endif
 
-#define pixel  uint8_t
-#define dctcoef DCTELEM
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define FUNCC(a) a ## _c
-
 static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){
     int i;
     INIT_CLIP
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 616a7a93b1..b3701ef3b8 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -26,7 +26,18 @@
  */
 
 #include "h264pred.h"
+
+#define BIT_DEPTH 8
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
 #include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264pred_template.c"
+#undef BIT_DEPTH
 
 static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
     const int lt= src[-1-1*stride];
@@ -245,11 +256,11 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
 }
 
 static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
-    pred16x16_plane_compat_c(src, stride, 1, 0);
+    pred16x16_plane_compat_8_c(src, stride, 1, 0);
 }
 
 static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
-    pred16x16_plane_compat_c(src, stride, 0, 1);
+    pred16x16_plane_compat_8_c(src, stride, 0, 1);
 }
 
 static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
@@ -352,130 +363,149 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
 /**
  * Set the intra prediction function pointers.
  */
-void ff_h264_pred_init(H264PredContext *h, int codec_id){
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
 //    MpegEncContext * const s = &h->s;
 
-    if(codec_id != CODEC_ID_RV40){
-        if(codec_id == CODEC_ID_VP8) {
-            h->pred4x4[VERT_PRED       ]= pred4x4_vertical_vp8_c;
-            h->pred4x4[HOR_PRED        ]= pred4x4_horizontal_vp8_c;
-        } else {
-            h->pred4x4[VERT_PRED       ]= pred4x4_vertical_c;
-            h->pred4x4[HOR_PRED        ]= pred4x4_horizontal_c;
-        }
-        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
-        if(codec_id == CODEC_ID_SVQ3)
-            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c;
-        else
-            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
-        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
-        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
-        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred4x4[VERT_LEFT_PRED  ]= pred4x4_vertical_left_vp8_c;
-        } else
-            h->pred4x4[VERT_LEFT_PRED  ]= pred4x4_vertical_left_c;
-        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
-        if(codec_id != CODEC_ID_VP8) {
-            h->pred4x4[LEFT_DC_PRED    ]= pred4x4_left_dc_c;
-            h->pred4x4[TOP_DC_PRED     ]= pred4x4_top_dc_c;
-            h->pred4x4[DC_128_PRED     ]= pred4x4_128_dc_c;
-        } else {
-            h->pred4x4[TM_VP8_PRED     ]= pred4x4_tm_vp8_c;
-            h->pred4x4[DC_127_PRED     ]= pred4x4_127_dc_c;
-            h->pred4x4[DC_129_PRED     ]= pred4x4_129_dc_c;
-            h->pred4x4[VERT_VP8_PRED   ]= pred4x4_vertical_c;
-            h->pred4x4[HOR_VP8_PRED    ]= pred4x4_horizontal_c;
-        }
-    }else{
-        h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
-        h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
-        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
-        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c;
-        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
-        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
-        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
-        h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_rv40_c;
-        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_rv40_c;
-        h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
-        h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
-        h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
-        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c;
-        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c;
-        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c;
+#undef FUNC
+#undef FUNCC
+#define FUNC(a, depth) a ## _ ## depth
+#define FUNCC(a, depth) a ## _ ## depth ## _c
+#define FUNCD(a) a ## _c
+
+#define H264_PRED(depth) \
+    if(codec_id != CODEC_ID_RV40){\
+        if(codec_id == CODEC_ID_VP8) {\
+            h->pred4x4[VERT_PRED       ]= FUNCD(pred4x4_vertical_vp8);\
+            h->pred4x4[HOR_PRED        ]= FUNCD(pred4x4_horizontal_vp8);\
+        } else {\
+            h->pred4x4[VERT_PRED       ]= FUNCC(pred4x4_vertical          , depth);\
+            h->pred4x4[HOR_PRED        ]= FUNCC(pred4x4_horizontal        , depth);\
+        }\
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
+        if(codec_id == CODEC_ID_SVQ3)\
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
+        else\
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left     , depth);\
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
+        if (codec_id == CODEC_ID_VP8) {\
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCD(pred4x4_vertical_left_vp8);\
+        } else\
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCC(pred4x4_vertical_left     , depth);\
+        h->pred4x4[HOR_UP_PRED         ]= FUNCC(pred4x4_horizontal_up     , depth);\
+        if(codec_id != CODEC_ID_VP8) {\
+            h->pred4x4[LEFT_DC_PRED    ]= FUNCC(pred4x4_left_dc           , depth);\
+            h->pred4x4[TOP_DC_PRED     ]= FUNCC(pred4x4_top_dc            , depth);\
+            h->pred4x4[DC_128_PRED     ]= FUNCC(pred4x4_128_dc            , depth);\
+        } else {\
+            h->pred4x4[TM_VP8_PRED     ]= FUNCD(pred4x4_tm_vp8);\
+            h->pred4x4[DC_127_PRED     ]= FUNCC(pred4x4_127_dc            , depth);\
+            h->pred4x4[DC_129_PRED     ]= FUNCC(pred4x4_129_dc            , depth);\
+            h->pred4x4[VERT_VP8_PRED   ]= FUNCC(pred4x4_vertical          , depth);\
+            h->pred4x4[HOR_VP8_PRED    ]= FUNCC(pred4x4_horizontal        , depth);\
+        }\
+    }else{\
+        h->pred4x4[VERT_PRED           ]= FUNCC(pred4x4_vertical          , depth);\
+        h->pred4x4[HOR_PRED            ]= FUNCC(pred4x4_horizontal        , depth);\
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
+        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
+        h->pred4x4[VERT_LEFT_PRED      ]= FUNCD(pred4x4_vertical_left_rv40);\
+        h->pred4x4[HOR_UP_PRED         ]= FUNCD(pred4x4_horizontal_up_rv40);\
+        h->pred4x4[LEFT_DC_PRED        ]= FUNCC(pred4x4_left_dc           , depth);\
+        h->pred4x4[TOP_DC_PRED         ]= FUNCC(pred4x4_top_dc            , depth);\
+        h->pred4x4[DC_128_PRED         ]= FUNCC(pred4x4_128_dc            , depth);\
+        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\
+        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\
+        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\
+    }\
+\
+    h->pred8x8l[VERT_PRED           ]= FUNCC(pred8x8l_vertical            , depth);\
+    h->pred8x8l[HOR_PRED            ]= FUNCC(pred8x8l_horizontal          , depth);\
+    h->pred8x8l[DC_PRED             ]= FUNCC(pred8x8l_dc                  , depth);\
+    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left           , depth);\
+    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right          , depth);\
+    h->pred8x8l[VERT_RIGHT_PRED     ]= FUNCC(pred8x8l_vertical_right      , depth);\
+    h->pred8x8l[HOR_DOWN_PRED       ]= FUNCC(pred8x8l_horizontal_down     , depth);\
+    h->pred8x8l[VERT_LEFT_PRED      ]= FUNCC(pred8x8l_vertical_left       , depth);\
+    h->pred8x8l[HOR_UP_PRED         ]= FUNCC(pred8x8l_horizontal_up       , depth);\
+    h->pred8x8l[LEFT_DC_PRED        ]= FUNCC(pred8x8l_left_dc             , depth);\
+    h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
+    h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
+\
+    h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical                   , depth);\
+    h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal                 , depth);\
+    if (codec_id != CODEC_ID_VP8) {\
+        h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                    , depth);\
+    } else\
+        h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
+    if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
+        h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
+        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
+        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
+        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
+        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
+    }else{\
+        h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
+        if (codec_id == CODEC_ID_VP8) {\
+            h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc              , depth);\
+            h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc              , depth);\
+        }\
+    }\
+    h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                     , depth);\
+\
+    h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
+    h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
+    h->pred16x16[HOR_PRED8x8    ]= FUNCC(pred16x16_horizontal             , depth);\
+    switch(codec_id){\
+    case CODEC_ID_SVQ3:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_svq3);\
+       break;\
+    case CODEC_ID_RV40:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_rv40);\
+       break;\
+    case CODEC_ID_VP8:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_tm_vp8);\
+       h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc               , depth);\
+       h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc               , depth);\
+       break;\
+    default:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCC(pred16x16_plane               , depth);\
+       break;\
+    }\
+    h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc                , depth);\
+    h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc                 , depth);\
+    h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc                 , depth);\
+\
+    /* special lossless h/v prediction for h264 */ \
+    h->pred4x4_add  [VERT_PRED   ]= FUNCC(pred4x4_vertical_add            , depth);\
+    h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add          , depth);\
+    h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add           , depth);\
+    h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add         , depth);\
+    h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add            , depth);\
+    h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add          , depth);\
+    h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add          , depth);\
+    h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add        , depth);\
+
+    switch (bit_depth) {
+        case 9:
+            H264_PRED(9)
+            break;
+        case 10:
+            H264_PRED(10)
+            break;
+        default:
+            H264_PRED(8)
+            break;
     }
 
-    h->pred8x8l[VERT_PRED           ]= pred8x8l_vertical_c;
-    h->pred8x8l[HOR_PRED            ]= pred8x8l_horizontal_c;
-    h->pred8x8l[DC_PRED             ]= pred8x8l_dc_c;
-    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
-    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
-    h->pred8x8l[VERT_RIGHT_PRED     ]= pred8x8l_vertical_right_c;
-    h->pred8x8l[HOR_DOWN_PRED       ]= pred8x8l_horizontal_down_c;
-    h->pred8x8l[VERT_LEFT_PRED      ]= pred8x8l_vertical_left_c;
-    h->pred8x8l[HOR_UP_PRED         ]= pred8x8l_horizontal_up_c;
-    h->pred8x8l[LEFT_DC_PRED        ]= pred8x8l_left_dc_c;
-    h->pred8x8l[TOP_DC_PRED         ]= pred8x8l_top_dc_c;
-    h->pred8x8l[DC_128_PRED         ]= pred8x8l_128_dc_c;
-
-    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
-    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
-    if (codec_id != CODEC_ID_VP8) {
-        h->pred8x8[PLANE_PRED8x8]= pred8x8_plane_c;
-    } else
-        h->pred8x8[PLANE_PRED8x8]= pred8x8_tm_vp8_c;
-    if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){
-        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
-        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
-        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
-        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t;
-        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt;
-        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00;
-        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0;
-    }else{
-        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_rv40_c;
-        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c;
-        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c;
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred8x8[DC_127_PRED8x8]= pred8x8_127_dc_c;
-            h->pred8x8[DC_129_PRED8x8]= pred8x8_129_dc_c;
-        }
-    }
-    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
-
-    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
-    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
-    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
-    switch(codec_id){
-    case CODEC_ID_SVQ3:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_svq3_c;
-       break;
-    case CODEC_ID_RV40:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_rv40_c;
-       break;
-    case CODEC_ID_VP8:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_tm_vp8_c;
-       h->pred16x16[DC_127_PRED8x8]= pred16x16_127_dc_c;
-       h->pred16x16[DC_129_PRED8x8]= pred16x16_129_dc_c;
-       break;
-    default:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
-       break;
-    }
-    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
-    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
-    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
-
-    //special lossless h/v prediction for h264
-    h->pred4x4_add  [VERT_PRED   ]= pred4x4_vertical_add_c;
-    h->pred4x4_add  [ HOR_PRED   ]= pred4x4_horizontal_add_c;
-    h->pred8x8l_add [VERT_PRED   ]= pred8x8l_vertical_add_c;
-    h->pred8x8l_add [ HOR_PRED   ]= pred8x8l_horizontal_add_c;
-    h->pred8x8_add  [VERT_PRED8x8]= pred8x8_vertical_add_c;
-    h->pred8x8_add  [ HOR_PRED8x8]= pred8x8_horizontal_add_c;
-    h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c;
-    h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c;
-
-    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id);
-    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id);
+    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
+    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);
 }
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index db3f5809f8..34b1e90bbc 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -101,8 +101,8 @@ typedef struct H264PredContext{
     void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
 }H264PredContext;
 
-void ff_h264_pred_init(H264PredContext *h, int codec_id);
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id);
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id);
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth);
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth);
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth);
 
 #endif /* AVCODEC_H264PRED_H */
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 4bd26e2892..066e837cdf 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -26,21 +26,7 @@
  */
 
 #include "mathops.h"
-#include "dsputil.h"
-
-#define BIT_DEPTH 8
-
-#define pixel uint8_t
-#define pixel4 uint32_t
-#define dctcoef DCTELEM
-
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define FUNC(a) a
-#define FUNCC(a) a ## _c
-#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
-#define AV_WN4P  AV_WN32
-#define AV_WN4PA AV_WN32A
+#include "high_bit_depth.h"
 
 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
     pixel *src = (pixel*)_src;
diff --git a/libavcodec/high_bit_depth.h b/libavcodec/high_bit_depth.h
new file mode 100644
index 0000000000..6f2b6a74f4
--- /dev/null
+++ b/libavcodec/high_bit_depth.h
@@ -0,0 +1,85 @@
+#include "dsputil.h"
+
+#ifndef BIT_DEPTH
+#define BIT_DEPTH 8
+#endif
+
+#ifdef AVCODEC_H264_HIGH_DEPTH_H
+#   undef pixel
+#   undef pixel2
+#   undef pixel4
+#   undef dctcoef
+#   undef INIT_CLIP
+#   undef no_rnd_avg_pixel4
+#   undef rnd_avg_pixel4
+#   undef AV_RN2P
+#   undef AV_RN4P
+#   undef AV_WN2P
+#   undef AV_WN4P
+#   undef AV_WN4PA
+#   undef CLIP
+#   undef FUNC
+#   undef FUNCC
+#   undef av_clip_pixel
+#   undef PIXEL_SPLAT_X4
+#else
+#   define AVCODEC_H264_HIGH_DEPTH_H
+#   define CLIP_PIXEL(depth)\
+    static inline uint16_t av_clip_pixel_ ## depth (int p)\
+    {\
+        const int pixel_max = (1 << depth)-1;\
+        return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\
+    }
+
+CLIP_PIXEL( 9)
+CLIP_PIXEL(10)
+#endif
+
+#if BIT_DEPTH > 8
+#   define pixel  uint16_t
+#   define pixel2 uint32_t
+#   define pixel4 uint64_t
+#   define dctcoef int32_t
+
+#   define INIT_CLIP
+#   define no_rnd_avg_pixel4 no_rnd_avg64
+#   define    rnd_avg_pixel4    rnd_avg64
+#   define AV_RN2P  AV_RN32
+#   define AV_RN4P  AV_RN64
+#   define AV_WN2P  AV_WN32
+#   define AV_WN4P  AV_WN64
+#   define AV_WN4PA AV_WN64A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+#else
+#   define pixel  uint8_t
+#   define pixel2 uint16_t
+#   define pixel4 uint32_t
+#   define dctcoef int16_t
+
+#   define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+#   define no_rnd_avg_pixel4 no_rnd_avg32
+#   define    rnd_avg_pixel4    rnd_avg32
+#   define AV_RN2P  AV_RN16
+#   define AV_RN4P  AV_RN32
+#   define AV_WN2P  AV_WN16
+#   define AV_WN4P  AV_WN32
+#   define AV_WN4PA AV_WN32A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+#endif
+
+#if BIT_DEPTH == 8
+#   define av_clip_pixel(a) av_clip_uint8(a)
+#   define CLIP(a) cm[a]
+#   define FUNC(a)  a ## _8
+#   define FUNCC(a) a ## _8_c
+#elif BIT_DEPTH == 9
+#   define av_clip_pixel(a) av_clip_pixel_9(a)
+#   define CLIP(a)          av_clip_pixel_9(a)
+#   define FUNC(a)  a ## _9
+#   define FUNCC(a) a ## _9_c
+#elif BIT_DEPTH == 10
+#   define av_clip_pixel(a) av_clip_pixel_10(a)
+#   define CLIP(a)          av_clip_pixel_10(a)
+#   define FUNC(a)  a ## _10
+#   define FUNCC(a) a ## _10_c
+#endif
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
index 9e49c91ddf..c0f2c036c6 100644
--- a/libavcodec/mlib/dsputil_mlib.c
+++ b/libavcodec/mlib/dsputil_mlib.c
@@ -421,10 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data)
 
 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     c->get_pixels  = get_pixels_mlib;
     c->diff_pixels = diff_pixels_mlib;
     c->add_pixels_clamped = add_pixels_clamped_mlib;
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = put_pixels16_mlib;
     c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
     c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
@@ -445,6 +448,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
 
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
     c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
+    }
 
     c->bswap_buf = bswap_buf_mlib;
 }
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 9111d036a5..adce61b277 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1384,6 +1384,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
 
 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     c->pix_abs[0][1] = sad16_x2_altivec;
     c->pix_abs[0][2] = sad16_y2_altivec;
     c->pix_abs[0][3] = sad16_xy2_altivec;
@@ -1397,8 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
     c->pix_sum = pix_sum_altivec;
     c->diff_pixels = diff_pixels_altivec;
     c->get_pixels = get_pixels_altivec;
+    if (!high_bit_depth)
     c->clear_block = clear_block_altivec;
     c->add_bytes= add_bytes_altivec;
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = put_pixels16_altivec;
     /* the two functions do the same thing, so use the same code */
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
@@ -1409,6 +1413,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
     c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
     c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
+    }
 
     c->hadamard8_diff[0] = hadamard8_diff16_altivec;
     c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index c52ea61805..5f131f3196 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -153,8 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h)
 
 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     // Common optimizations whether AltiVec is available or not
     c->prefetch = prefetch_ppc;
+    if (!high_bit_depth) {
     switch (check_dcbzl_effect()) {
         case 32:
             c->clear_blocks = clear_blocks_dcbz32_ppc;
@@ -165,6 +168,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
         default:
             break;
     }
+    }
 
 #if HAVE_ALTIVEC
     if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 02699beef4..fae0674720 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -965,8 +965,10 @@ H264_WEIGHT( 8, 8)
 H264_WEIGHT( 8, 4)
 
 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+    if (!high_bit_depth) {
         c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
         c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
 
@@ -992,11 +994,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
         dspfunc(avg_h264_qpel, 0, 16);
 #undef dspfunc
     }
+    }
 }
 
-void ff_h264dsp_init_ppc(H264DSPContext *c)
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth)
 {
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+    if (bit_depth == 8) {
         c->h264_idct_add = ff_h264_idct_add_altivec;
         c->h264_idct_add8 = ff_h264_idct_add8_altivec;
         c->h264_idct_add16 = ff_h264_idct_add16_altivec;
@@ -1019,4 +1023,5 @@ void ff_h264dsp_init_ppc(H264DSPContext *c)
         c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
         c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
     }
+    }
 }
diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c
index b6096b3b4a..f4503a9030 100644
--- a/libavcodec/ps2/dsputil_mmi.c
+++ b/libavcodec/ps2/dsputil_mmi.c
@@ -142,7 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
 {
     const int idct_algo= avctx->idct_algo;
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
+    if (!high_bit_depth) {
     c->clear_blocks = clear_blocks_mmi;
 
     c->put_pixels_tab[1][0] = put_pixels8_mmi;
@@ -150,6 +152,7 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
 
     c->put_pixels_tab[0][0] = put_pixels16_mmi;
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
+    }
 
     c->get_pixels = get_pixels_mmi;
 
diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index f3ab979976..b5d314cf0f 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1384,7 +1384,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
     if (MPV_common_init(s) < 0)
         return -1;
 
-    ff_h264_pred_init(&r->h, CODEC_ID_RV40);
+    ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
 
     r->intra_types_stride = 4*s->mb_stride + 4;
     r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 7a8d60d73e..db40ece670 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -333,6 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
 
 void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
 {
+        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
+        if (!high_bit_depth) {
         c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
         c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
         c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
@@ -368,6 +371,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
         c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
         c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
         c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
+        }
 
 #ifdef QPEL
 
@@ -401,20 +405,24 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
     dspfunc(avg_qpel, 1, 8);
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
 
+    if (!high_bit_depth) {
     dspfunc(put_h264_qpel, 0, 16);
     dspfunc(put_h264_qpel, 1, 8);
     dspfunc(put_h264_qpel, 2, 4);
     dspfunc(avg_h264_qpel, 0, 16);
     dspfunc(avg_h264_qpel, 1, 8);
     dspfunc(avg_h264_qpel, 2, 4);
+    }
 
 #undef dspfunc
+    if (!high_bit_depth) {
     c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
     c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
     c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
     c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
     c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
     c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
+    }
 
     c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c
index 0c724c346d..9ea48ad4a1 100644
--- a/libavcodec/sh4/dsputil_sh4.c
+++ b/libavcodec/sh4/dsputil_sh4.c
@@ -92,8 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
 {
         const int idct_algo= avctx->idct_algo;
+        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
         dsputil_init_align(c,avctx);
 
+        if (!high_bit_depth)
         c->clear_blocks = clear_blocks_sh4;
         if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
                 c->idct_put = idct_put;
diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c
index baf555b7f5..ab9258b2b9 100644
--- a/libavcodec/sparc/dsputil_vis.c
+++ b/libavcodec/sparc/dsputil_vis.c
@@ -3953,6 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
 {
   /* VIS-specific optimizations */
   int accel = vis_level ();
+  const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
   if (accel & ACCEL_SPARC_VIS) {
       if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){
@@ -3962,6 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
           c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
       }
 
+      if (!high_bit_depth) {
       c->put_pixels_tab[0][0] = MC_put_o_16_vis;
       c->put_pixels_tab[0][1] = MC_put_x_16_vis;
       c->put_pixels_tab[0][2] = MC_put_y_16_vis;
@@ -4001,5 +4003,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
       c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
       c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
       c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis;
+      }
   }
 }
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index ea0e911d0b..dc7eb2121a 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1698,7 +1698,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = PIX_FMT_YUV420P;
 
     dsputil_init(&s->dsp, avctx);
-    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
+    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
     ff_vp8dsp_init(&s->vp8dsp);
 
     return 0;
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index f98e6ae0fa..a0cb11aa40 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
     if (avctx->dsp_mask) {
         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
         c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
         c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
+        if (!high_bit_depth) {
         c->clear_block  = clear_block_mmx;
         c->clear_blocks = clear_blocks_mmx;
         if ((mm_flags & AV_CPU_FLAG_SSE) &&
@@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->clear_block  = clear_block_sse;
             c->clear_blocks = clear_blocks_sse;
         }
+        }
 
 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
         c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
@@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
         c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
 
+        if (!high_bit_depth) {
         SET_HPEL_FUNCS(put, 0, 16, mmx);
         SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
         SET_HPEL_FUNCS(avg, 0, 16, mmx);
@@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
         SET_HPEL_FUNCS(avg, 1, 8, mmx);
         SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
+        }
 
 #if ARCH_X86_32 || !HAVE_YASM
         c->gmc= gmc_mmx;
 #endif
 #if ARCH_X86_32 && HAVE_YASM
+        if (!high_bit_depth)
         c->emulated_edge_mc = emulated_edge_mc_mmx;
 #endif
 
         c->add_bytes= add_bytes_mmx;
         c->add_bytes_l2= add_bytes_l2_mmx;
 
+        if (!high_bit_depth)
         c->draw_edges = draw_edges_mmx;
 
         if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
@@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         }
 
 #if HAVE_YASM
+        if (!high_bit_depth) {
         c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
         c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
+        }
 
         c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
         c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
@@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         if (mm_flags & AV_CPU_FLAG_MMX2) {
             c->prefetch = prefetch_mmx2;
 
+            if (!high_bit_depth) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
             c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
 
@@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
             c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
             c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+            }
 
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                if (!high_bit_depth) {
                 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
                 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
                 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
                 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
                 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+                }
 
                 if (CONFIG_VP3_DECODER && HAVE_YASM) {
                     c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
@@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
 
+            if (!high_bit_depth) {
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
             SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+            }
 
             SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
@@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
 
+            if (!high_bit_depth) {
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
             c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
             c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
+            }
 
             c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
 #endif
@@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
             c->prefetch = prefetch_3dnow;
 
+            if (!high_bit_depth) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
             c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
 
@@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
             }
+            }
 
             if (CONFIG_VP3_DECODER
                 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
@@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
 
+            if (!high_bit_depth) {
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
             SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+            }
 
             SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
@@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
 
 #if HAVE_YASM
+            if (!high_bit_depth) {
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
+            }
 
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
@@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
         if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
             // these functions are slower than mmx on AMD, but faster on Intel
+            if (!high_bit_depth) {
             c->put_pixels_tab[0][0] = put_pixels16_sse2;
             c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
             c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
             H264_QPEL_FUNCS(0, 0, sse2);
+            }
         }
         if(mm_flags & AV_CPU_FLAG_SSE2){
+            if (!high_bit_depth) {
             H264_QPEL_FUNCS(0, 1, sse2);
             H264_QPEL_FUNCS(0, 2, sse2);
             H264_QPEL_FUNCS(0, 3, sse2);
@@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 1, sse2);
             H264_QPEL_FUNCS(3, 2, sse2);
             H264_QPEL_FUNCS(3, 3, sse2);
+            }
         }
 #if HAVE_SSSE3
         if(mm_flags & AV_CPU_FLAG_SSSE3){
+            if (!high_bit_depth) {
             H264_QPEL_FUNCS(1, 0, ssse3);
             H264_QPEL_FUNCS(1, 1, ssse3);
             H264_QPEL_FUNCS(1, 2, ssse3);
@@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 1, ssse3);
             H264_QPEL_FUNCS(3, 2, ssse3);
             H264_QPEL_FUNCS(3, 3, ssse3);
+            }
             c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
 #if HAVE_YASM
+            if (!high_bit_depth) {
             c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
             c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
+            }
             c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
             if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
                 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
@@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 }
             }
 
+            if (!high_bit_depth)
             c->emulated_edge_mc = emulated_edge_mc_sse;
             c->gmc= gmc_sse;
 #endif
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 4142cc1edd..9eb752581b 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -95,9 +95,13 @@ void ff_pred4x4_tm_vp8_mmxext      (uint8_t *src, const uint8_t *topright, int s
 void ff_pred4x4_tm_vp8_ssse3       (uint8_t *src, const uint8_t *topright, int stride);
 void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
 
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
 {
     int mm_flags = av_get_cpu_flags();
+    const int high_depth = bit_depth > 8;
+
+    if (high_depth)
+        return;
 
 #if HAVE_YASM
     if (mm_flags & AV_CPU_FLAG_MMX) {
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 9f004a5314..b4936a618e 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -285,10 +285,11 @@ H264_BIWEIGHT_MMX    ( 4,  8)
 H264_BIWEIGHT_MMX    ( 4,  4)
 H264_BIWEIGHT_MMX    ( 4,  2)
 
-void ff_h264dsp_init_x86(H264DSPContext *c)
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
 {
     int mm_flags = av_get_cpu_flags();
 
+    if (bit_depth == 8) {
     if (mm_flags & AV_CPU_FLAG_MMX2) {
         c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
     }
@@ -378,5 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c)
             }
         }
     }
+    }
 #endif
 }