From 188dea1dbfd8761133ca138bba0d8f19beac6c09 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 2 Apr 2011 12:28:01 +0200
Subject: lavc: move some flac-specific options to its private context.

---
 libavcodec/alacenc.c  |   4 +-
 libavcodec/avcodec.h  |  35 ++++++++++++-----
 libavcodec/flacenc.c  | 102 +++++++++++++++++++++++++++++++++++---------------
 libavcodec/lpc.c      |  12 +++---
 libavcodec/lpc.h      |  17 +++++++--
 libavcodec/options.c  |  16 +++++---
 libavcodec/ra144enc.c |   4 +-
 libavcodec/version.h  |   5 ++-
 8 files changed, 135 insertions(+), 60 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c
index 2e20a602dc..acaa545915 100644
--- a/libavcodec/alacenc.c
+++ b/libavcodec/alacenc.c
@@ -146,7 +146,7 @@ static void calc_predictor_params(AlacEncodeContext *s, int ch)
                                       s->min_prediction_order,
                                       s->max_prediction_order,
                                       ALAC_MAX_LPC_PRECISION, coefs, shift,
-                                      AV_LPC_TYPE_LEVINSON, 0,
+                                      FF_LPC_TYPE_LEVINSON, 0,
                                       ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1);
 
         s->lpc[ch].lpc_order = opt_order;
@@ -457,7 +457,7 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
 
     s->avctx = avctx;
     ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, s->max_prediction_order,
-                      AV_LPC_TYPE_LEVINSON);
+                      FF_LPC_TYPE_LEVINSON);
 
     return ret;
 }
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index ec11ce09fe..876ba8c21b 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -514,10 +514,11 @@ enum AVChromaLocation{
     AVCHROMA_LOC_NB           , ///< Not part of ABI
 };
 
+#if FF_API_FLAC_GLOBAL_OPTS
 /**
  * LPC analysis type
  */
-enum AVLPCType {
+attribute_deprecated enum AVLPCType {
     AV_LPC_TYPE_DEFAULT     = -1, ///< use the codec default LPC type
     AV_LPC_TYPE_NONE        =  0, ///< do not use LPC prediction or use all zero coefficients
     AV_LPC_TYPE_FIXED       =  1, ///< fixed LPC coefficients
@@ -525,6 +526,7 @@ enum AVLPCType {
     AV_LPC_TYPE_CHOLESKY    =  3, ///< Cholesky factorization
     AV_LPC_TYPE_NB              , ///< Not part of ABI
 };
+#endif
 
 enum AVAudioServiceType {
     AV_AUDIO_SERVICE_TYPE_MAIN              = 0,
@@ -2472,42 +2474,53 @@ typedef struct AVCodecContext {
 #define FF_COMPRESSION_DEFAULT -1
 
     /**
-     * LPC coefficient precision - used by FLAC encoder
      * - encoding: Set by user.
      * - decoding: unused
      */
-    int lpc_coeff_precision;
+    int min_prediction_order;
 
     /**
      * - encoding: Set by user.
      * - decoding: unused
      */
-    int min_prediction_order;
+    int max_prediction_order;
 
+#if FF_API_FLAC_GLOBAL_OPTS
     /**
+     * @defgroup flac_opts FLAC options
+     * @deprecated Use FLAC encoder private options instead.
+     * @{
+     */
+
+    /**
+     * LPC coefficient precision - used by FLAC encoder
      * - encoding: Set by user.
      * - decoding: unused
      */
-    int max_prediction_order;
+    attribute_deprecated int lpc_coeff_precision;
 
     /**
      * search method for selecting prediction order
      * - encoding: Set by user.
      * - decoding: unused
      */
-    int prediction_order_method;
+    attribute_deprecated int prediction_order_method;
 
     /**
      * - encoding: Set by user.
      * - decoding: unused
      */
-    int min_partition_order;
+    attribute_deprecated int min_partition_order;
 
     /**
      * - encoding: Set by user.
      * - decoding: unused
      */
-    int max_partition_order;
+    attribute_deprecated int max_partition_order;
+    /**
+     * @}
+     */
+#endif
 
     /**
      * GOP timecode frame start number, in non drop frame format
@@ -2725,19 +2738,21 @@ typedef struct AVCodecContext {
 
     int log_level_offset;
 
+#if FF_API_FLAC_GLOBAL_OPTS
     /**
      * Determines which LPC analysis algorithm to use.
      * - encoding: Set by user
      * - decoding: unused
      */
-    enum AVLPCType lpc_type;
+    attribute_deprecated enum AVLPCType lpc_type;
 
     /**
      * Number of passes to use for Cholesky factorization during LPC analysis
      * - encoding: Set by user
      * - decoding: unused
      */
-    int lpc_passes;
+    attribute_deprecated int lpc_passes;
+#endif
 
     /**
      * Number of slices.
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index f13d5801e2..7685ff6ea0 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/crc.h"
 #include "libavutil/md5.h"
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "golomb.h"
@@ -43,7 +44,7 @@
 typedef struct CompressionOptions {
     int compression_level;
     int block_time_ms;
-    enum AVLPCType lpc_type;
+    enum FFLPCType lpc_type;
     int lpc_passes;
     int lpc_coeff_precision;
     int min_prediction_order;
@@ -80,6 +81,7 @@ typedef struct FlacFrame {
 } FlacFrame;
 
 typedef struct FlacEncodeContext {
+    AVClass *class;
     PutBitContext pb;
     int channels;
     int samplerate;
@@ -156,16 +158,16 @@ static av_cold void dprint_compression_options(FlacEncodeContext *s)
     av_log(avctx, AV_LOG_DEBUG, " compression: %d\n", opt->compression_level);
 
     switch (opt->lpc_type) {
-    case AV_LPC_TYPE_NONE:
+    case FF_LPC_TYPE_NONE:
         av_log(avctx, AV_LOG_DEBUG, " lpc type: None\n");
         break;
-    case AV_LPC_TYPE_FIXED:
+    case FF_LPC_TYPE_FIXED:
         av_log(avctx, AV_LOG_DEBUG, " lpc type: Fixed pre-defined coefficients\n");
         break;
-    case AV_LPC_TYPE_LEVINSON:
+    case FF_LPC_TYPE_LEVINSON:
         av_log(avctx, AV_LOG_DEBUG, " lpc type: Levinson-Durbin recursion with Welch window\n");
         break;
-    case AV_LPC_TYPE_CHOLESKY:
+    case FF_LPC_TYPE_CHOLESKY:
         av_log(avctx, AV_LOG_DEBUG, " lpc type: Cholesky factorization, %d pass%s\n",
                opt->lpc_passes, opt->lpc_passes == 1 ? "" : "es");
         break;
@@ -266,32 +268,42 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
 
     s->options.block_time_ms = ((int[]){ 27, 27, 27,105,105,105,105,105,105,105,105,105,105})[level];
 
-    s->options.lpc_type      = ((int[]){ AV_LPC_TYPE_FIXED,    AV_LPC_TYPE_FIXED,    AV_LPC_TYPE_FIXED,
-                                         AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON,
-                                         AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON,
-                                         AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON,
-                                         AV_LPC_TYPE_LEVINSON})[level];
+    if (s->options.lpc_type == FF_LPC_TYPE_DEFAULT)
+        s->options.lpc_type  = ((int[]){ FF_LPC_TYPE_FIXED,    FF_LPC_TYPE_FIXED,    FF_LPC_TYPE_FIXED,
+                                         FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
+                                         FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
+                                         FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
+                                         FF_LPC_TYPE_LEVINSON})[level];
 
     s->options.min_prediction_order = ((int[]){  2,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1})[level];
     s->options.max_prediction_order = ((int[]){  3,  4,  4,  6,  8,  8,  8,  8, 12, 12, 12, 32, 32})[level];
 
-    s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST,    ORDER_METHOD_EST,    ORDER_METHOD_EST,
-                                                   ORDER_METHOD_EST,    ORDER_METHOD_EST,    ORDER_METHOD_EST,
-                                                   ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG,    ORDER_METHOD_4LEVEL,
-                                                   ORDER_METHOD_LOG,    ORDER_METHOD_SEARCH, ORDER_METHOD_LOG,
-                                                   ORDER_METHOD_SEARCH})[level];
+    if (s->options.prediction_order_method < 0)
+        s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST,    ORDER_METHOD_EST,    ORDER_METHOD_EST,
+                                                       ORDER_METHOD_EST,    ORDER_METHOD_EST,    ORDER_METHOD_EST,
+                                                       ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG,    ORDER_METHOD_4LEVEL,
+                                                       ORDER_METHOD_LOG,    ORDER_METHOD_SEARCH, ORDER_METHOD_LOG,
+                                                       ORDER_METHOD_SEARCH})[level];
 
-    s->options.min_partition_order = ((int[]){  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0})[level];
-    s->options.max_partition_order = ((int[]){  2,  2,  3,  3,  3,  8,  8,  8,  8,  8,  8,  8,  8})[level];
+    if (s->options.min_partition_order > s->options.max_partition_order) {
+        av_log(avctx, AV_LOG_ERROR, "invalid partition orders: min=%d max=%d\n",
+               s->options.min_partition_order, s->options.max_partition_order);
+        return AVERROR(EINVAL);
+    }
+    if (s->options.min_partition_order < 0)
+        s->options.min_partition_order = ((int[]){  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0})[level];
+    if (s->options.max_partition_order < 0)
+        s->options.max_partition_order = ((int[]){  2,  2,  3,  3,  3,  8,  8,  8,  8,  8,  8,  8,  8})[level];
 
     /* set compression option overrides from AVCodecContext */
-    if (avctx->lpc_type > AV_LPC_TYPE_DEFAULT) {
-        if (avctx->lpc_type > AV_LPC_TYPE_CHOLESKY) {
+#if FF_API_FLAC_GLOBAL_OPTS
+    if (avctx->lpc_type > FF_LPC_TYPE_DEFAULT) {
+        if (avctx->lpc_type > FF_LPC_TYPE_CHOLESKY) {
             av_log(avctx, AV_LOG_ERROR, "unknown lpc type: %d\n", avctx->lpc_type);
             return -1;
         }
         s->options.lpc_type = avctx->lpc_type;
-        if (s->options.lpc_type == AV_LPC_TYPE_CHOLESKY) {
+        if (s->options.lpc_type == FF_LPC_TYPE_CHOLESKY) {
             if (avctx->lpc_passes < 0) {
                 // default number of passes for Cholesky
                 s->options.lpc_passes = 2;
@@ -304,11 +316,12 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
             }
         }
     }
+#endif
 
-    if (s->options.lpc_type == AV_LPC_TYPE_NONE) {
+    if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
         s->options.min_prediction_order = 0;
     } else if (avctx->min_prediction_order >= 0) {
-        if (s->options.lpc_type == AV_LPC_TYPE_FIXED) {
+        if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
             if (avctx->min_prediction_order > MAX_FIXED_ORDER) {
                 av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
                        avctx->min_prediction_order);
@@ -322,10 +335,10 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
         }
         s->options.min_prediction_order = avctx->min_prediction_order;
     }
-    if (s->options.lpc_type == AV_LPC_TYPE_NONE) {
+    if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
         s->options.max_prediction_order = 0;
     } else if (avctx->max_prediction_order >= 0) {
-        if (s->options.lpc_type == AV_LPC_TYPE_FIXED) {
+        if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
             if (avctx->max_prediction_order > MAX_FIXED_ORDER) {
                 av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
                        avctx->max_prediction_order);
@@ -345,6 +358,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
         return -1;
     }
 
+#if FF_API_FLAC_GLOBAL_OPTS
     if (avctx->prediction_order_method >= 0) {
         if (avctx->prediction_order_method > ORDER_METHOD_LOG) {
             av_log(avctx, AV_LOG_ERROR, "invalid prediction order method: %d\n",
@@ -375,6 +389,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
                s->options.min_partition_order, s->options.max_partition_order);
         return -1;
     }
+#endif
 
     if (avctx->frame_size > 0) {
         if (avctx->frame_size < FLAC_MIN_BLOCKSIZE ||
@@ -388,6 +403,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
     }
     s->max_blocksize = s->avctx->frame_size;
 
+#if FF_API_FLAC_GLOBAL_OPTS
     /* set LPC precision */
     if (avctx->lpc_coeff_precision > 0) {
         if (avctx->lpc_coeff_precision > MAX_LPC_PRECISION) {
@@ -396,10 +412,8 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
             return -1;
         }
         s->options.lpc_coeff_precision = avctx->lpc_coeff_precision;
-    } else {
-        /* default LPC precision */
-        s->options.lpc_coeff_precision = 15;
     }
+#endif
 
     /* set maximum encoded frame size in verbatim mode */
     s->max_framesize = ff_flac_get_max_frame_size(s->avctx->frame_size,
@@ -426,7 +440,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
 
     ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
-                      s->options.max_prediction_order, AV_LPC_TYPE_LEVINSON);
+                      s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
 
     dprint_compression_options(s);
 
@@ -867,8 +881,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
 
     /* FIXED */
     sub->type = FLAC_SUBFRAME_FIXED;
-    if (s->options.lpc_type == AV_LPC_TYPE_NONE  ||
-        s->options.lpc_type == AV_LPC_TYPE_FIXED || n <= max_order) {
+    if (s->options.lpc_type == FF_LPC_TYPE_NONE  ||
+        s->options.lpc_type == FF_LPC_TYPE_FIXED || n <= max_order) {
         uint32_t bits[MAX_FIXED_ORDER+1];
         if (max_order > MAX_FIXED_ORDER)
             max_order = MAX_FIXED_ORDER;
@@ -1314,6 +1328,33 @@ static av_cold int flac_encode_close(AVCodecContext *avctx)
     return 0;
 }
 
+#define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
+static const AVOption options[] = {
+{ "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), FF_OPT_TYPE_INT, 15, 0, MAX_LPC_PRECISION, FLAGS },
+{ "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), FF_OPT_TYPE_INT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, "lpc_type" },
+{ "none",     NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_NONE,     INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "fixed",    NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_FIXED,    INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "levinson", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_LEVINSON, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "cholesky", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes),  FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, FLAGS },
+{ "min_partition_order",  NULL, offsetof(FlacEncodeContext, options.min_partition_order),  FF_OPT_TYPE_INT, -1,      -1, MAX_PARTITION_ORDER, FLAGS },
+{ "max_partition_order",  NULL, offsetof(FlacEncodeContext, options.max_partition_order),  FF_OPT_TYPE_INT, -1,      -1, MAX_PARTITION_ORDER, FLAGS },
+{ "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), FF_OPT_TYPE_INT, -1, -1, ORDER_METHOD_LOG, FLAGS, "predm" },
+{ "estimation", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_EST,    INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "2level",     NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_2LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "4level",     NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_4LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "8level",     NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_8LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "search",     NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_SEARCH, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "log",        NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_LOG,    INT_MIN, INT_MAX, FLAGS, "predm" },
+{ NULL },
+};
+
+static const AVClass flac_encoder_class = {
+    "FLAC encoder",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_flac_encoder = {
     "flac",
@@ -1327,4 +1368,5 @@ AVCodec ff_flac_encoder = {
     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
     .long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"),
+    .priv_class = &flac_encoder_class,
 };
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index 58bb02de7f..ed985d36ff 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -158,7 +158,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
                       const int32_t *samples, int blocksize, int min_order,
                       int max_order, int precision,
                       int32_t coefs[][MAX_LPC_ORDER], int *shift,
-                      enum AVLPCType lpc_type, int lpc_passes,
+                      enum FFLPCType lpc_type, int lpc_passes,
                       int omethod, int max_shift, int zero_shift)
 {
     double autoc[MAX_LPC_ORDER+1];
@@ -168,7 +168,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
     int opt_order;
 
     assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER &&
-           lpc_type > AV_LPC_TYPE_FIXED);
+           lpc_type > FF_LPC_TYPE_FIXED);
 
     /* reinit LPC context if parameters have changed */
     if (blocksize != s->blocksize || max_order != s->max_order ||
@@ -177,7 +177,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
         ff_lpc_init(s, blocksize, max_order, lpc_type);
     }
 
-    if (lpc_type == AV_LPC_TYPE_LEVINSON) {
+    if (lpc_type == FF_LPC_TYPE_LEVINSON) {
         double *windowed_samples = s->windowed_samples + max_order;
 
         s->lpc_apply_welch_window(samples, blocksize, windowed_samples);
@@ -188,7 +188,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
 
         for(i=0; i<max_order; i++)
             ref[i] = fabs(lpc[i][i]);
-    } else if (lpc_type == AV_LPC_TYPE_CHOLESKY) {
+    } else if (lpc_type == FF_LPC_TYPE_CHOLESKY) {
         LLSModel m[2];
         double var[MAX_LPC_ORDER+1], av_uninit(weight);
 
@@ -241,13 +241,13 @@ int ff_lpc_calc_coefs(LPCContext *s,
 }
 
 av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order,
-                        enum AVLPCType lpc_type)
+                        enum FFLPCType lpc_type)
 {
     s->blocksize = blocksize;
     s->max_order = max_order;
     s->lpc_type  = lpc_type;
 
-    if (lpc_type == AV_LPC_TYPE_LEVINSON) {
+    if (lpc_type == FF_LPC_TYPE_LEVINSON) {
         s->windowed_samples = av_mallocz((blocksize + max_order + 2) *
                                          sizeof(*s->windowed_samples));
         if (!s->windowed_samples)
diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index c2d342801a..8cc2362e5b 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -35,11 +35,22 @@
 #define MIN_LPC_ORDER        1
 #define MAX_LPC_ORDER       32
 
+/**
+ * LPC analysis type
+ */
+enum FFLPCType {
+    FF_LPC_TYPE_DEFAULT     = -1, ///< use the codec default LPC type
+    FF_LPC_TYPE_NONE        =  0, ///< do not use LPC prediction or use all zero coefficients
+    FF_LPC_TYPE_FIXED       =  1, ///< fixed LPC coefficients
+    FF_LPC_TYPE_LEVINSON    =  2, ///< Levinson-Durbin recursion
+    FF_LPC_TYPE_CHOLESKY    =  3, ///< Cholesky factorization
+    FF_LPC_TYPE_NB              , ///< Not part of ABI
+};
 
 typedef struct LPCContext {
     int blocksize;
     int max_order;
-    enum AVLPCType lpc_type;
+    enum FFLPCType lpc_type;
     double *windowed_samples;
 
     /**
@@ -77,14 +88,14 @@ int ff_lpc_calc_coefs(LPCContext *s,
                       const int32_t *samples, int blocksize, int min_order,
                       int max_order, int precision,
                       int32_t coefs[][MAX_LPC_ORDER], int *shift,
-                      enum AVLPCType lpc_type, int lpc_passes,
+                      enum FFLPCType lpc_type, int lpc_passes,
                       int omethod, int max_shift, int zero_shift);
 
 /**
  * Initialize LPCContext.
  */
 int ff_lpc_init(LPCContext *s, int blocksize, int max_order,
-                enum AVLPCType lpc_type);
+                enum FFLPCType lpc_type);
 void ff_lpc_init_x86(LPCContext *s);
 
 /**
diff --git a/libavcodec/options.c b/libavcodec/options.c
index 2f44185f36..03b87a3572 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -380,12 +380,14 @@ static const AVOption options[]={
 {"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"},
 {"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
 {"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"lpc_coeff_precision", "LPC coefficient precision (FLAC)", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
 {"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
 {"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"prediction_order_method", "search method for selecting prediction order", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+#if FF_API_FLAC_GLOBAL_OPTS
+{"lpc_coeff_precision", "deprecated, use flac-specific options", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
+{"prediction_order_method", "deprecated, use flac-specific options", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"min_partition_order", "deprecated, use flac-specific options", OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"max_partition_order", "deprecated, use flac-specific options", OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+#endif
 {"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT64, 0, 0, INT64_MAX, V|E},
 {"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_DROP_FRAME_TIMECODE, INT_MIN, INT_MAX, V|E, "flags2"},
 {"non_linear_q", "use non linear quantizer", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_NON_LINEAR_QUANT, INT_MIN, INT_MAX, V|E, "flags2"},
@@ -416,12 +418,14 @@ static const AVOption options[]={
 {"intra_refresh", "use periodic insertion of intra blocks instead of keyframes", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_REFRESH, INT_MIN, INT_MAX, V|E, "flags2"},
 {"crf_max", "in crf mode, prevents vbv from lowering quality beyond this point", OFFSET(crf_max), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E},
 {"log_level_offset", "set the log level offset", OFFSET(log_level_offset), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX },
-{"lpc_type", "specify LPC algorithm", OFFSET(lpc_type), FF_OPT_TYPE_INT, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E},
+#if FF_API_FLAC_GLOBAL_OPTS
+{"lpc_type", "deprecated, use flac-specific options", OFFSET(lpc_type), FF_OPT_TYPE_INT, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E},
 {"none",     NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_NONE,     INT_MIN, INT_MAX, A|E, "lpc_type"},
 {"fixed",    NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_FIXED,    INT_MIN, INT_MAX, A|E, "lpc_type"},
 {"levinson", NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_LEVINSON, INT_MIN, INT_MAX, A|E, "lpc_type"},
 {"cholesky", NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, A|E, "lpc_type"},
-{"lpc_passes", "number of passes to use for Cholesky factorization during LPC analysis", OFFSET(lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"lpc_passes", "deprecated, use flac-specific options", OFFSET(lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+#endif
 {"slices", "number of slices, used in parallelized decoding", OFFSET(slices), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
 {"thread_type", "select multithreading type", OFFSET(thread_type), FF_OPT_TYPE_INT, FF_THREAD_SLICE|FF_THREAD_FRAME, 0, INT_MAX, V|E|D, "thread_type"},
 {"slice", NULL, 0, FF_OPT_TYPE_CONST, FF_THREAD_SLICE, INT_MIN, INT_MAX, V|E|D, "thread_type"},
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 24ba934cc1..6eab6c300f 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -54,7 +54,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
     ractx->avctx = avctx;
     ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
-                      AV_LPC_TYPE_LEVINSON);
+                      FF_LPC_TYPE_LEVINSON);
     return ret;
 }
 
@@ -461,7 +461,7 @@ static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame,
                                     32)];
 
     ff_lpc_calc_coefs(&ractx->lpc_ctx, lpc_data, NBLOCKS * BLOCKSIZE, LPC_ORDER,
-                      LPC_ORDER, 16, lpc_coefs, shift, AV_LPC_TYPE_LEVINSON,
+                      LPC_ORDER, 16, lpc_coefs, shift, FF_LPC_TYPE_LEVINSON,
                       0, ORDER_METHOD_EST, 12, 0);
     for (i = 0; i < LPC_ORDER; i++)
         block_coefs[NBLOCKS - 1][i] = -(lpc_coefs[LPC_ORDER - 1][i] <<
diff --git a/libavcodec/version.h b/libavcodec/version.h
index d840fbe81f..1b454b8bd6 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,7 +21,7 @@
 #define AVCODEC_VERSION_H
 
 #define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR  2
+#define LIBAVCODEC_VERSION_MINOR  3
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
@@ -62,5 +62,8 @@
 #ifndef FF_API_OLD_FF_PICT_TYPES
 #define FF_API_OLD_FF_PICT_TYPES (LIBAVCODEC_VERSION_MAJOR < 54)
 #endif
+#ifndef FF_API_FLAC_GLOBAL_OPTS
+#define FF_API_FLAC_GLOBAL_OPTS (LIBAVCODEC_VERSION_MAJOR < 54)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
-- 
cgit v1.2.3


From dd561441b1e849df7d8681c6f32af82d4088dafd Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 9 May 2011 09:48:21 -0400
Subject: h264: DSP'ize MBAFF loopfilter.

---
 libavcodec/h264_loopfilter.c | 144 +++++++------------------------------------
 libavcodec/h264dsp.c         |  56 +++++++++++------
 libavcodec/h264dsp.h         |   4 ++
 3 files changed, 64 insertions(+), 140 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 619ab198e3..35630658b5 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -135,138 +135,38 @@ static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t
     }
 }
 
-static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) {
-    int i;
+static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
     int index_a = qp + h->slice_alpha_c0_offset;
     int alpha = alpha_table[index_a];
     int beta  = beta_table[qp + h->slice_beta_offset];
-    for( i = 0; i < 8; i++, pix += stride) {
-        const int bS_index = (i >> 1) * bsi;
-
-        if( bS[bS_index] == 0 ) {
-            continue;
-        }
-
-        if( bS[bS_index] < 4 ) {
-            const int tc0 = tc0_table[index_a][bS[bS_index]];
-            const int p0 = pix[-1];
-            const int p1 = pix[-2];
-            const int p2 = pix[-3];
-            const int q0 = pix[0];
-            const int q1 = pix[1];
-            const int q2 = pix[2];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
-                int tc = tc0;
-                int i_delta;
-
-                if( FFABS( p2 - p0 ) < beta ) {
-                    if(tc0)
-                    pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
-                    tc++;
-                }
-                if( FFABS( q2 - q0 ) < beta ) {
-                    if(tc0)
-                    pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
-                    tc++;
-                }
-
-                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-                pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
-                pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
-                tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
-            }
-        }else{
-            const int p0 = pix[-1];
-            const int p1 = pix[-2];
-            const int p2 = pix[-3];
-
-            const int q0 = pix[0];
-            const int q1 = pix[1];
-            const int q2 = pix[2];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
+    if (alpha ==0 || beta == 0) return;
 
-                if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
-                    if( FFABS( p2 - p0 ) < beta)
-                    {
-                        const int p3 = pix[-4];
-                        /* p0', p1', p2' */
-                        pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
-                        pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
-                        pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
-                    } else {
-                        /* p0' */
-                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
-                    }
-                    if( FFABS( q2 - q0 ) < beta)
-                    {
-                        const int q3 = pix[3];
-                        /* q0', q1', q2' */
-                        pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
-                        pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
-                        pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
-                    } else {
-                        /* q0' */
-                        pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
-                    }
-                }else{
-                    /* p0', q0' */
-                    pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
-                    pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
-                }
-                tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
-            }
-        }
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        tc[0] = tc0_table[index_a][bS[0*bsi]];
+        tc[1] = tc0_table[index_a][bS[1*bsi]];
+        tc[2] = tc0_table[index_a][bS[2*bsi]];
+        tc[3] = tc0_table[index_a][bS[3*bsi]];
+        h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
+    } else {
+        h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
     }
 }
-static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) {
-    int i;
+static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
     int index_a = qp + h->slice_alpha_c0_offset;
     int alpha = alpha_table[index_a];
     int beta  = beta_table[qp + h->slice_beta_offset];
-    for( i = 0; i < 4; i++, pix += stride) {
-        const int bS_index = i*bsi;
-
-        if( bS[bS_index] == 0 ) {
-            continue;
-        }
+    if (alpha ==0 || beta == 0) return;
 
-        if( bS[bS_index] < 4 ) {
-            const int tc = tc0_table[index_a][bS[bS_index]] + 1;
-            const int p0 = pix[-1];
-            const int p1 = pix[-2];
-            const int q0 = pix[0];
-            const int q1 = pix[1];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
-                const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-
-                pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
-                pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
-                tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
-            }
-        }else{
-            const int p0 = pix[-1];
-            const int p1 = pix[-2];
-            const int q0 = pix[0];
-            const int q1 = pix[1];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
-
-                pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
-                pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
-                tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
-            }
-        }
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        tc[0] = tc0_table[index_a][bS[0*bsi]] + 1;
+        tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
+        tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
+        tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
+        h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
+    } else {
+        h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
     }
 }
 
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 85dbb8f2e5..1ef6a26db4 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -99,15 +99,15 @@ H264_WEIGHT(2,2)
 #undef op_scale2
 #undef H264_WEIGHT
 
-static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
     int i, d;
     for( i = 0; i < 4; i++ ) {
         if( tc0[i] < 0 ) {
-            pix += 4*ystride;
+            pix += inner_iters*ystride;
             continue;
         }
-        for( d = 0; d < 4; d++ ) {
+        for( d = 0; d < inner_iters; d++ ) {
             const int p0 = pix[-1*xstride];
             const int p1 = pix[-2*xstride];
             const int p2 = pix[-3*xstride];
@@ -143,17 +143,21 @@ static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, in
 }
 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
+    h264_loop_filter_luma_c(pix, stride, 1, 4, alpha, beta, tc0);
 }
 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
+    h264_loop_filter_luma_c(pix, 1, stride, 4, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_luma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, 1, stride, 2, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
 {
     int d;
-    for( d = 0; d < 16; d++ ) {
+    for( d = 0; d < 4 * inner_iters; d++ ) {
         const int p2 = pix[-3*xstride];
         const int p1 = pix[-2*xstride];
         const int p0 = pix[-1*xstride];
@@ -200,23 +204,27 @@ static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *p
 }
 static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
+    h264_loop_filter_luma_intra_c(pix, stride, 1, 4, alpha, beta);
 }
 static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
+    h264_loop_filter_luma_intra_c(pix, 1, stride, 4, alpha, beta);
+}
+static void h264_h_loop_filter_luma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, 1, stride, 2, alpha, beta);
 }
 
-static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
     int i, d;
     for( i = 0; i < 4; i++ ) {
         const int tc = tc0[i];
         if( tc <= 0 ) {
-            pix += 2*ystride;
+            pix += inner_iters*ystride;
             continue;
         }
-        for( d = 0; d < 2; d++ ) {
+        for( d = 0; d < inner_iters; d++ ) {
             const int p0 = pix[-1*xstride];
             const int p1 = pix[-2*xstride];
             const int q0 = pix[0];
@@ -237,17 +245,21 @@ static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix,
 }
 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
+    h264_loop_filter_chroma_c(pix, stride, 1, 2, alpha, beta, tc0);
 }
 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
+    h264_loop_filter_chroma_c(pix, 1, stride, 2, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_chroma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, 1, stride, 1, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
 {
     int d;
-    for( d = 0; d < 8; d++ ) {
+    for( d = 0; d < 4 * inner_iters; d++ ) {
         const int p0 = pix[-1*xstride];
         const int p1 = pix[-2*xstride];
         const int q0 = pix[0];
@@ -265,11 +277,15 @@ static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t
 }
 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
+    h264_loop_filter_chroma_intra_c(pix, stride, 1, 2, alpha, beta);
 }
 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
+    h264_loop_filter_chroma_intra_c(pix, 1, stride, 2, alpha, beta);
+}
+static void h264_h_loop_filter_chroma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, 1, stride, 1, alpha, beta);
 }
 
 void ff_h264dsp_init(H264DSPContext *c)
@@ -307,12 +323,16 @@ void ff_h264dsp_init(H264DSPContext *c)
 
     c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
     c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
+    c->h264_h_loop_filter_luma_mbaff= h264_h_loop_filter_luma_mbaff_c;
     c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
     c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
+    c->h264_h_loop_filter_luma_mbaff_intra= h264_h_loop_filter_luma_mbaff_intra_c;
     c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
     c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
+    c->h264_h_loop_filter_chroma_mbaff= h264_h_loop_filter_chroma_mbaff_c;
     c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
     c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
+    c->h264_h_loop_filter_chroma_mbaff_intra= h264_h_loop_filter_chroma_mbaff_intra_c;
     c->h264_loop_filter_strength= NULL;
 
     if (ARCH_ARM) ff_h264dsp_init_arm(c);
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 6fa1746b89..9db4c13784 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -45,13 +45,17 @@ typedef struct H264DSPContext{
     /* loop filter */
     void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
     void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
     /* v/h_loop_filter_luma_intra: align 16 */
     void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
     void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta);
     void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
     void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
     void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
     void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
     // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
     void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
                                       int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
-- 
cgit v1.2.3


From e39e3abad4ce667f06d41097428695a7aad095fb Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Sun, 10 Apr 2011 22:42:36 +0200
Subject: Choose h264 chroma dc dequant function dynamically.

Needed for high bit depth h264 decoding.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/dsputil.h  |  1 +
 libavcodec/h264.c     | 29 ++++-------------------------
 libavcodec/h264dsp.c  |  1 +
 libavcodec/h264dsp.h  |  1 +
 libavcodec/h264idct.c | 22 ++++++++++++++++++++++
 5 files changed, 29 insertions(+), 25 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 8d8bc29dd5..9b4aef758a 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -64,6 +64,7 @@ void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *bl
 void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 
+void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul);
 void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 501777049f..142a07807a 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -288,27 +288,6 @@ static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
 #undef xStride
 #undef stride
 
-static void chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
-    const int stride= 16*2;
-    const int xStride= 16;
-    int a,b,c,d,e;
-
-    a= block[stride*0 + xStride*0];
-    b= block[stride*0 + xStride*1];
-    c= block[stride*1 + xStride*0];
-    d= block[stride*1 + xStride*1];
-
-    e= a-b;
-    a= a+b;
-    b= c-d;
-    c= c+d;
-
-    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
-    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
-    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
-    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
-}
-
 #if 0
 static void chroma_dc_dct_c(DCTELEM *block){
     const int stride= 16*2;
@@ -1286,15 +1265,15 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
             }else{
                 if(is_h264){
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
-                        chroma_dc_dequant_idct_c(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
-                        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
                 }else{
-                    chroma_dc_dequant_idct_c(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
-                    chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     for(i=16; i<16+8; i++){
                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 1ef6a26db4..b555ed343a 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -299,6 +299,7 @@ void ff_h264dsp_init(H264DSPContext *c)
     c->h264_idct_add8      = ff_h264_idct_add8_c;
     c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
     c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c;
+    c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c;
 
     c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
     c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 9db4c13784..7eb50be342 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -72,6 +72,7 @@ typedef struct H264DSPContext{
     void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
     void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
     void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul);
+    void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
 }H264DSPContext;
 
 void ff_h264dsp_init(H264DSPContext *c);
diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index ecf669ebd0..790f1883fd 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -250,4 +250,26 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul){
         output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
         output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
     }
+#undef stride
+}
+
+void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
+    const int stride= 16*2;
+    const int xStride= 16;
+    int a,b,c,d,e;
+
+    a= block[stride*0 + xStride*0];
+    b= block[stride*0 + xStride*1];
+    c= block[stride*1 + xStride*0];
+    d= block[stride*1 + xStride*1];
+
+    e= a-b;
+    a= a+b;
+    b= c-d;
+    c= c+d;
+
+    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
+    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
+    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
+    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
 }
-- 
cgit v1.2.3


From 42239ced656ddddb85eb9655e8a66d29065f0dad Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:47 +0200
Subject: Add pixel formats for 9- and 10-bit yuv420p.

Also add support for these formats in libswscale.

Needed for high bit depth h264 decoding.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/utils.c            |  4 ++++
 libavutil/pixdesc.c           | 46 +++++++++++++++++++++++++++++++++++++++++++
 libavutil/pixfmt.h            |  6 ++++++
 libswscale/swscale.c          | 23 +++++++++++++++++++++-
 libswscale/swscale_internal.h | 10 ++++++++++
 libswscale/swscale_template.c | 34 ++++++++++++++++++++++++++++++++
 libswscale/utils.c            |  4 ++++
 7 files changed, 126 insertions(+), 1 deletion(-)

(limited to 'libavcodec')

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 6c10dd32c0..e356ca08cb 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -136,6 +136,10 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int l
     case PIX_FMT_YUVJ440P:
     case PIX_FMT_YUVJ444P:
     case PIX_FMT_YUVA420P:
+    case PIX_FMT_YUV420P9LE:
+    case PIX_FMT_YUV420P9BE:
+    case PIX_FMT_YUV420P10LE:
+    case PIX_FMT_YUV420P10BE:
         w_align= 16; //FIXME check for non mpeg style codecs and use less alignment
         h_align= 16;
         if(s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id == CODEC_ID_AMV || s->codec_id == CODEC_ID_THP || s->codec_id == CODEC_ID_H264)
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 16a26da3da..a291141436 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -740,6 +740,52 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
         .log2_chroma_h = 1,
         .flags = PIX_FMT_HWACCEL,
     },
+    [PIX_FMT_YUV420P9LE] = {
+        .name = "yuv420p9le",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,1,1,0,8},        /* Y */
+            {1,1,1,0,8},        /* U */
+            {2,1,1,0,8},        /* V */
+        },
+    },
+    [PIX_FMT_YUV420P9BE] = {
+        .name = "yuv420p9be",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,1,1,0,8},        /* Y */
+            {1,1,1,0,8},        /* U */
+            {2,1,1,0,8},        /* V */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_YUV420P10LE] = {
+        .name = "yuv420p10le",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,1,1,0,9},        /* Y */
+            {1,1,1,0,9},        /* U */
+            {2,1,1,0,9},        /* V */
+        },
+    },
+    [PIX_FMT_YUV420P10BE] = {
+        .name = "yuv420p10be",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,1,1,0,9},        /* Y */
+            {1,1,1,0,9},        /* U */
+            {2,1,1,0,9},        /* V */
+        },
+        .flags = PIX_FMT_BE,
+    },
     [PIX_FMT_YUV420P16LE] = {
         .name = "yuv420p16le",
         .nb_components= 3,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index dcdf4af4fb..fafbf9be1a 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -135,6 +135,10 @@ enum PixelFormat {
     PIX_FMT_Y400A,     ///< 8bit gray, 8bit alpha
     PIX_FMT_BGR48BE,   ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big-endian
     PIX_FMT_BGR48LE,   ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as little-endian
+    PIX_FMT_YUV420P9BE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+    PIX_FMT_YUV420P9LE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+    PIX_FMT_YUV420P10BE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+    PIX_FMT_YUV420P10LE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
     PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
 };
 
@@ -159,6 +163,8 @@ enum PixelFormat {
 #define PIX_FMT_BGR555 PIX_FMT_NE(BGR555BE, BGR555LE)
 #define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
 
+#define PIX_FMT_YUV420P9  PIX_FMT_NE(YUV420P9BE , YUV420P9LE)
+#define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
 #define PIX_FMT_YUV420P16 PIX_FMT_NE(YUV420P16BE, YUV420P16LE)
 #define PIX_FMT_YUV422P16 PIX_FMT_NE(YUV422P16BE, YUV422P16LE)
 #define PIX_FMT_YUV444P16 PIX_FMT_NE(YUV444P16BE, YUV444P16LE)
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 93c6a13b9d..2830f26ce5 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1669,7 +1669,28 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
                 length*=2;
             fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
         } else {
-            if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
+            if(isNBPS(c->srcFormat)) {
+                const int depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
+                uint16_t *srcPtr2 = (uint16_t*)srcPtr;
+
+                if (is16BPS(c->dstFormat)) {
+                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
+                    for (i = 0; i < height; i++) {
+                        for (j = 0; j < length; j++)
+                            dstPtr2[j] = (srcPtr2[j]<<(16-depth)) | (srcPtr2[j]>>(2*depth-16));
+                        dstPtr2 += dstStride[plane]/2;
+                        srcPtr2 += srcStride[plane]/2;
+                    }
+                } else {
+                    // FIXME Maybe dither instead.
+                    for (i = 0; i < height; i++) {
+                        for (j = 0; j < length; j++)
+                            dstPtr[j] = srcPtr2[j]>>(depth-8);
+                        dstPtr  += dstStride[plane];
+                        srcPtr2 += srcStride[plane]/2;
+                    }
+                }
+            } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
                 if (!isBE(c->srcFormat)) srcPtr++;
                 for (i=0; i<height; i++) {
                     for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 292e42fa0d..2d40215ea9 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -354,6 +354,12 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_YUV422P16BE \
         || (x)==PIX_FMT_YUV444P16BE \
     )
+#define isNBPS(x)       (           \
+           (x)==PIX_FMT_YUV420P9LE  \
+        || (x)==PIX_FMT_YUV420P9BE  \
+        || (x)==PIX_FMT_YUV420P10LE \
+        || (x)==PIX_FMT_YUV420P10BE \
+    )
 #define isBE(x) ((x)&1)
 #define isPlanar8YUV(x) (           \
            (x)==PIX_FMT_YUV410P     \
@@ -368,9 +374,13 @@ const char *sws_format_name(enum PixelFormat format);
     )
 #define isPlanarYUV(x)  (           \
         isPlanar8YUV(x)             \
+        || (x)==PIX_FMT_YUV420P9LE  \
+        || (x)==PIX_FMT_YUV420P10LE \
         || (x)==PIX_FMT_YUV420P16LE \
         || (x)==PIX_FMT_YUV422P16LE \
         || (x)==PIX_FMT_YUV444P16LE \
+        || (x)==PIX_FMT_YUV420P9BE  \
+        || (x)==PIX_FMT_YUV420P10BE \
         || (x)==PIX_FMT_YUV420P16BE \
         || (x)==PIX_FMT_YUV422P16BE \
         || (x)==PIX_FMT_YUV444P16BE \
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 234e61871b..81a8d66277 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -225,6 +225,32 @@ static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
     nvXXtoUV_c(dstV, dstU, src1, width);
 }
 
+// FIXME Maybe dither instead.
+#define YUV_NBPS(depth) \
+static inline void yuv ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                                          const uint8_t *_srcU, const uint8_t *_srcV, \
+                                          long width, uint32_t *unused) \
+{ \
+    int i; \
+    const uint16_t *srcU = (const uint16_t*)_srcU; \
+    const uint16_t *srcV = (const uint16_t*)_srcV; \
+    for (i = 0; i < width; i++) { \
+        dstU[i] = srcU[i]>>(depth-8); \
+        dstV[i] = srcV[i]>>(depth-8); \
+    } \
+} \
+\
+static inline void yuv ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \
+{ \
+    int i; \
+    const uint16_t *srcY = (const uint16_t*)_srcY; \
+    for (i = 0; i < width; i++) \
+        dstY[i] = srcY[i]>>(depth-8); \
+} \
+
+YUV_NBPS( 9)
+YUV_NBPS(10)
+
 static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
                               long width, uint32_t *unused)
 {
@@ -790,6 +816,10 @@ static void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_PAL8     :
         case PIX_FMT_BGR4_BYTE:
         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
+        case PIX_FMT_YUV420P9BE:
+        case PIX_FMT_YUV420P9LE: c->chrToYV12 = yuv9ToUV_c; break;
+        case PIX_FMT_YUV420P10BE:
+        case PIX_FMT_YUV420P10LE: c->chrToYV12 = yuv10ToUV_c; break;
         case PIX_FMT_YUV420P16BE:
         case PIX_FMT_YUV422P16BE:
         case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
@@ -836,6 +866,10 @@ static void sws_init_swScale_c(SwsContext *c)
     c->lumToYV12 = NULL;
     c->alpToYV12 = NULL;
     switch (srcFormat) {
+    case PIX_FMT_YUV420P9BE:
+    case PIX_FMT_YUV420P9LE: c->lumToYV12 = yuv9ToY_c; break;
+    case PIX_FMT_YUV420P10BE:
+    case PIX_FMT_YUV420P10LE: c->lumToYV12 = yuv10ToY_c; break;
     case PIX_FMT_YUYV422  :
     case PIX_FMT_YUV420P16BE:
     case PIX_FMT_YUV422P16BE:
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 5f5eb322bf..4f9f269731 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -106,9 +106,13 @@ const char *swscale_license(void)
         || (x)==PIX_FMT_YUV440P     \
         || (x)==PIX_FMT_MONOWHITE   \
         || (x)==PIX_FMT_MONOBLACK   \
+        || (x)==PIX_FMT_YUV420P9LE    \
+        || (x)==PIX_FMT_YUV420P10LE   \
         || (x)==PIX_FMT_YUV420P16LE   \
         || (x)==PIX_FMT_YUV422P16LE   \
         || (x)==PIX_FMT_YUV444P16LE   \
+        || (x)==PIX_FMT_YUV420P9BE    \
+        || (x)==PIX_FMT_YUV420P10BE   \
         || (x)==PIX_FMT_YUV420P16BE   \
         || (x)==PIX_FMT_YUV422P16BE   \
         || (x)==PIX_FMT_YUV444P16BE   \
-- 
cgit v1.2.3


From 563c72dabb7ef34b667836c1b0e2008c08e89fa6 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Mon, 9 May 2011 10:58:40 -0400
Subject: Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264dsp.c          | 259 +-------------------------------------
 libavcodec/h264dsp_template.c | 285 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 286 insertions(+), 258 deletions(-)
 create mode 100644 libavcodec/h264dsp_template.c

(limited to 'libavcodec')

diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index b555ed343a..22b2086243 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -29,264 +29,7 @@
 #include "avcodec.h"
 #include "h264dsp.h"
 
-#define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
-#define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
-#define H264_WEIGHT(W,H) \
-static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
-    int y; \
-    offset <<= log2_denom; \
-    if(log2_denom) offset += 1<<(log2_denom-1); \
-    for(y=0; y<H; y++, block += stride){ \
-        op_scale1(0); \
-        op_scale1(1); \
-        if(W==2) continue; \
-        op_scale1(2); \
-        op_scale1(3); \
-        if(W==4) continue; \
-        op_scale1(4); \
-        op_scale1(5); \
-        op_scale1(6); \
-        op_scale1(7); \
-        if(W==8) continue; \
-        op_scale1(8); \
-        op_scale1(9); \
-        op_scale1(10); \
-        op_scale1(11); \
-        op_scale1(12); \
-        op_scale1(13); \
-        op_scale1(14); \
-        op_scale1(15); \
-    } \
-} \
-static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
-    int y; \
-    offset = ((offset + 1) | 1) << log2_denom; \
-    for(y=0; y<H; y++, dst += stride, src += stride){ \
-        op_scale2(0); \
-        op_scale2(1); \
-        if(W==2) continue; \
-        op_scale2(2); \
-        op_scale2(3); \
-        if(W==4) continue; \
-        op_scale2(4); \
-        op_scale2(5); \
-        op_scale2(6); \
-        op_scale2(7); \
-        if(W==8) continue; \
-        op_scale2(8); \
-        op_scale2(9); \
-        op_scale2(10); \
-        op_scale2(11); \
-        op_scale2(12); \
-        op_scale2(13); \
-        op_scale2(14); \
-        op_scale2(15); \
-    } \
-}
-
-H264_WEIGHT(16,16)
-H264_WEIGHT(16,8)
-H264_WEIGHT(8,16)
-H264_WEIGHT(8,8)
-H264_WEIGHT(8,4)
-H264_WEIGHT(4,8)
-H264_WEIGHT(4,4)
-H264_WEIGHT(4,2)
-H264_WEIGHT(2,4)
-H264_WEIGHT(2,2)
-
-#undef op_scale1
-#undef op_scale2
-#undef H264_WEIGHT
-
-static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
-{
-    int i, d;
-    for( i = 0; i < 4; i++ ) {
-        if( tc0[i] < 0 ) {
-            pix += inner_iters*ystride;
-            continue;
-        }
-        for( d = 0; d < inner_iters; d++ ) {
-            const int p0 = pix[-1*xstride];
-            const int p1 = pix[-2*xstride];
-            const int p2 = pix[-3*xstride];
-            const int q0 = pix[0];
-            const int q1 = pix[1*xstride];
-            const int q2 = pix[2*xstride];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
-
-                int tc = tc0[i];
-                int i_delta;
-
-                if( FFABS( p2 - p0 ) < beta ) {
-                    if(tc0[i])
-                    pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
-                    tc++;
-                }
-                if( FFABS( q2 - q0 ) < beta ) {
-                    if(tc0[i])
-                    pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
-                    tc++;
-                }
-
-                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-                pix[-xstride] = av_clip_uint8( p0 + i_delta );    /* p0' */
-                pix[0]        = av_clip_uint8( q0 - i_delta );    /* q0' */
-            }
-            pix += ystride;
-        }
-    }
-}
-static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_luma_c(pix, stride, 1, 4, alpha, beta, tc0);
-}
-static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_luma_c(pix, 1, stride, 4, alpha, beta, tc0);
-}
-static void h264_h_loop_filter_luma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_luma_c(pix, 1, stride, 2, alpha, beta, tc0);
-}
-
-static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
-{
-    int d;
-    for( d = 0; d < 4 * inner_iters; d++ ) {
-        const int p2 = pix[-3*xstride];
-        const int p1 = pix[-2*xstride];
-        const int p0 = pix[-1*xstride];
-
-        const int q0 = pix[ 0*xstride];
-        const int q1 = pix[ 1*xstride];
-        const int q2 = pix[ 2*xstride];
-
-        if( FFABS( p0 - q0 ) < alpha &&
-            FFABS( p1 - p0 ) < beta &&
-            FFABS( q1 - q0 ) < beta ) {
-
-            if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
-                if( FFABS( p2 - p0 ) < beta)
-                {
-                    const int p3 = pix[-4*xstride];
-                    /* p0', p1', p2' */
-                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
-                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
-                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
-                } else {
-                    /* p0' */
-                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
-                }
-                if( FFABS( q2 - q0 ) < beta)
-                {
-                    const int q3 = pix[3*xstride];
-                    /* q0', q1', q2' */
-                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
-                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
-                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
-                } else {
-                    /* q0' */
-                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
-                }
-            }else{
-                /* p0', q0' */
-                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
-                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
-            }
-        }
-        pix += ystride;
-    }
-}
-static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_luma_intra_c(pix, stride, 1, 4, alpha, beta);
-}
-static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_luma_intra_c(pix, 1, stride, 4, alpha, beta);
-}
-static void h264_h_loop_filter_luma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_luma_intra_c(pix, 1, stride, 2, alpha, beta);
-}
-
-static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
-{
-    int i, d;
-    for( i = 0; i < 4; i++ ) {
-        const int tc = tc0[i];
-        if( tc <= 0 ) {
-            pix += inner_iters*ystride;
-            continue;
-        }
-        for( d = 0; d < inner_iters; d++ ) {
-            const int p0 = pix[-1*xstride];
-            const int p1 = pix[-2*xstride];
-            const int q0 = pix[0];
-            const int q1 = pix[1*xstride];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
-
-                int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-
-                pix[-xstride] = av_clip_uint8( p0 + delta );    /* p0' */
-                pix[0]        = av_clip_uint8( q0 - delta );    /* q0' */
-            }
-            pix += ystride;
-        }
-    }
-}
-static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_chroma_c(pix, stride, 1, 2, alpha, beta, tc0);
-}
-static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_chroma_c(pix, 1, stride, 2, alpha, beta, tc0);
-}
-static void h264_h_loop_filter_chroma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_chroma_c(pix, 1, stride, 1, alpha, beta, tc0);
-}
-
-static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
-{
-    int d;
-    for( d = 0; d < 4 * inner_iters; d++ ) {
-        const int p0 = pix[-1*xstride];
-        const int p1 = pix[-2*xstride];
-        const int q0 = pix[0];
-        const int q1 = pix[1*xstride];
-
-        if( FFABS( p0 - q0 ) < alpha &&
-            FFABS( p1 - p0 ) < beta &&
-            FFABS( q1 - q0 ) < beta ) {
-
-            pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
-            pix[0]        = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
-        }
-        pix += ystride;
-    }
-}
-static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_chroma_intra_c(pix, stride, 1, 2, alpha, beta);
-}
-static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_chroma_intra_c(pix, 1, stride, 2, alpha, beta);
-}
-static void h264_h_loop_filter_chroma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_chroma_intra_c(pix, 1, stride, 1, alpha, beta);
-}
+#include "h264dsp_template.c"
 
 void ff_h264dsp_init(H264DSPContext *c)
 {
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
new file mode 100644
index 0000000000..2fe09ff56b
--- /dev/null
+++ b/libavcodec/h264dsp_template.c
@@ -0,0 +1,285 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG4 part10 DSP functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
+#define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
+#define H264_WEIGHT(W,H) \
+static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
+    int y; \
+    offset <<= log2_denom; \
+    if(log2_denom) offset += 1<<(log2_denom-1); \
+    for(y=0; y<H; y++, block += stride){ \
+        op_scale1(0); \
+        op_scale1(1); \
+        if(W==2) continue; \
+        op_scale1(2); \
+        op_scale1(3); \
+        if(W==4) continue; \
+        op_scale1(4); \
+        op_scale1(5); \
+        op_scale1(6); \
+        op_scale1(7); \
+        if(W==8) continue; \
+        op_scale1(8); \
+        op_scale1(9); \
+        op_scale1(10); \
+        op_scale1(11); \
+        op_scale1(12); \
+        op_scale1(13); \
+        op_scale1(14); \
+        op_scale1(15); \
+    } \
+} \
+static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+    int y; \
+    offset = ((offset + 1) | 1) << log2_denom; \
+    for(y=0; y<H; y++, dst += stride, src += stride){ \
+        op_scale2(0); \
+        op_scale2(1); \
+        if(W==2) continue; \
+        op_scale2(2); \
+        op_scale2(3); \
+        if(W==4) continue; \
+        op_scale2(4); \
+        op_scale2(5); \
+        op_scale2(6); \
+        op_scale2(7); \
+        if(W==8) continue; \
+        op_scale2(8); \
+        op_scale2(9); \
+        op_scale2(10); \
+        op_scale2(11); \
+        op_scale2(12); \
+        op_scale2(13); \
+        op_scale2(14); \
+        op_scale2(15); \
+    } \
+}
+
+H264_WEIGHT(16,16)
+H264_WEIGHT(16,8)
+H264_WEIGHT(8,16)
+H264_WEIGHT(8,8)
+H264_WEIGHT(8,4)
+H264_WEIGHT(4,8)
+H264_WEIGHT(4,4)
+H264_WEIGHT(4,2)
+H264_WEIGHT(2,4)
+H264_WEIGHT(2,2)
+
+#undef op_scale1
+#undef op_scale2
+#undef H264_WEIGHT
+
+static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        if( tc0[i] < 0 ) {
+            pix += inner_iters*ystride;
+            continue;
+        }
+        for( d = 0; d < inner_iters; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int p2 = pix[-3*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+            const int q2 = pix[2*xstride];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                int tc = tc0[i];
+                int i_delta;
+
+                if( FFABS( p2 - p0 ) < beta ) {
+                    if(tc0[i])
+                    pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+                if( FFABS( q2 - q0 ) < beta ) {
+                    if(tc0[i])
+                    pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+
+                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-xstride] = av_clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]        = av_clip_uint8( q0 - i_delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, stride, 1, 4, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, 1, stride, 4, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_luma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, 1, stride, 2, alpha, beta, tc0);
+}
+
+static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+{
+    int d;
+    for( d = 0; d < 4 * inner_iters; d++ ) {
+        const int p2 = pix[-3*xstride];
+        const int p1 = pix[-2*xstride];
+        const int p0 = pix[-1*xstride];
+
+        const int q0 = pix[ 0*xstride];
+        const int q1 = pix[ 1*xstride];
+        const int q2 = pix[ 2*xstride];
+
+        if( FFABS( p0 - q0 ) < alpha &&
+            FFABS( p1 - p0 ) < beta &&
+            FFABS( q1 - q0 ) < beta ) {
+
+            if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                if( FFABS( p2 - p0 ) < beta)
+                {
+                    const int p3 = pix[-4*xstride];
+                    /* p0', p1', p2' */
+                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                } else {
+                    /* p0' */
+                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                }
+                if( FFABS( q2 - q0 ) < beta)
+                {
+                    const int q3 = pix[3*xstride];
+                    /* q0', q1', q2' */
+                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                } else {
+                    /* q0' */
+                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                }
+            }else{
+                /* p0', q0' */
+                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+            }
+        }
+        pix += ystride;
+    }
+}
+static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, stride, 1, 4, alpha, beta);
+}
+static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, 1, stride, 4, alpha, beta);
+}
+static void h264_h_loop_filter_luma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, 1, stride, 2, alpha, beta);
+}
+
+static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        const int tc = tc0[i];
+        if( tc <= 0 ) {
+            pix += inner_iters*ystride;
+            continue;
+        }
+        for( d = 0; d < inner_iters; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+
+                pix[-xstride] = av_clip_uint8( p0 + delta );    /* p0' */
+                pix[0]        = av_clip_uint8( q0 - delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, stride, 1, 2, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, 1, stride, 2, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_chroma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, 1, stride, 1, alpha, beta, tc0);
+}
+
+static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+{
+    int d;
+    for( d = 0; d < 4 * inner_iters; d++ ) {
+        const int p0 = pix[-1*xstride];
+        const int p1 = pix[-2*xstride];
+        const int q0 = pix[0];
+        const int q1 = pix[1*xstride];
+
+        if( FFABS( p0 - q0 ) < alpha &&
+            FFABS( p1 - p0 ) < beta &&
+            FFABS( q1 - q0 ) < beta ) {
+
+            pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
+            pix[0]        = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+        }
+        pix += ystride;
+    }
+}
+static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, stride, 1, 2, alpha, beta);
+}
+static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, 1, stride, 2, alpha, beta);
+}
+static void h264_h_loop_filter_chroma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, 1, stride, 1, alpha, beta);
+}
-- 
cgit v1.2.3


From 5ada25245d4d74bdfc99b363934fe9a3de467eeb Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:49 +0200
Subject: Move the functions in h264pred.c into a new file h264pred_template.c.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264pred.c          | 897 +---------------------------------------
 libavcodec/h264pred_template.c | 912 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 917 insertions(+), 892 deletions(-)
 create mode 100644 libavcodec/h264pred_template.c

(limited to 'libavcodec')

diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 1a4ab81402..616a7a93b1 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -25,108 +25,17 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#include "avcodec.h"
-#include "mpegvideo.h"
 #include "h264pred.h"
-#include "mathops.h"
-
-static void pred4x4_vertical_c(uint8_t *src, const uint8_t *topright, int stride){
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    ((uint32_t*)(src+0*stride))[0]= a;
-    ((uint32_t*)(src+1*stride))[0]= a;
-    ((uint32_t*)(src+2*stride))[0]= a;
-    ((uint32_t*)(src+3*stride))[0]= a;
-}
-
-static void pred4x4_horizontal_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
-    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
-    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
-    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
-}
-
-static void pred4x4_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
-                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
-
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_left_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
-
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_top_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
-
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_128_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
-}
-
-static void pred4x4_127_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= 127U*0x01010101U;
-}
-
-static void pred4x4_129_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= 129U*0x01010101U;
-}
-
-
-#define LOAD_TOP_RIGHT_EDGE\
-    const int av_unused t4= topright[0];\
-    const int av_unused t5= topright[1];\
-    const int av_unused t6= topright[2];\
-    const int av_unused t7= topright[3];\
-
-#define LOAD_DOWN_LEFT_EDGE\
-    const int av_unused l4= src[-1+4*stride];\
-    const int av_unused l5= src[-1+5*stride];\
-    const int av_unused l6= src[-1+6*stride];\
-    const int av_unused l7= src[-1+7*stride];\
-
-#define LOAD_LEFT_EDGE\
-    const int av_unused l0= src[-1+0*stride];\
-    const int av_unused l1= src[-1+1*stride];\
-    const int av_unused l2= src[-1+2*stride];\
-    const int av_unused l3= src[-1+3*stride];\
-
-#define LOAD_TOP_EDGE\
-    const int av_unused t0= src[ 0-1*stride];\
-    const int av_unused t1= src[ 1-1*stride];\
-    const int av_unused t2= src[ 2-1*stride];\
-    const int av_unused t3= src[ 3-1*stride];\
+#include "h264pred_template.c"
 
 static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
     const int lt= src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
     uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
-                            (t0 + 2*t1 + t2 + 2) >> 2,
-                            (t1 + 2*t2 + t3 + 2) >> 2,
-                            (t2 + 2*t3 + t4 + 2) >> 2);
+                          (t0 + 2*t1 + t2 + 2) >> 2,
+                          (t1 + 2*t2 + t3 + 2) >> 2,
+                          (t2 + 2*t3 + t4 + 2) >> 2);
 
     AV_WN32A(src+0*stride, v);
     AV_WN32A(src+1*stride, v);
@@ -144,52 +53,6 @@ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int
     AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
 }
 
-static void pred4x4_down_right_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE
-    LOAD_LEFT_EDGE
-
-    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
-    src[0+2*stride]=
-    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
-    src[0+1*stride]=
-    src[1+2*stride]=
-    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
-    src[0+0*stride]=
-    src[1+1*stride]=
-    src[2+2*stride]=
-    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
-    src[1+0*stride]=
-    src[2+1*stride]=
-    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
-    src[2+0*stride]=
-    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-}
-
-static void pred4x4_down_left_c(uint8_t *src, const uint8_t *topright, int stride){
-    LOAD_TOP_EDGE
-    LOAD_TOP_RIGHT_EDGE
-//    LOAD_LEFT_EDGE
-
-    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
-    src[1+0*stride]=
-    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
-    src[2+0*stride]=
-    src[1+1*stride]=
-    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
-    src[3+0*stride]=
-    src[2+1*stride]=
-    src[1+2*stride]=
-    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
-    src[3+1*stride]=
-    src[2+2*stride]=
-    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
-    src[3+2*stride]=
-    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
-    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
-}
-
 static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
@@ -261,53 +124,8 @@ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *toprigh
     src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
 }
 
-static void pred4x4_vertical_right_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE
-    LOAD_LEFT_EDGE
-
-    src[0+0*stride]=
-    src[1+2*stride]=(lt + t0 + 1)>>1;
-    src[1+0*stride]=
-    src[2+2*stride]=(t0 + t1 + 1)>>1;
-    src[2+0*stride]=
-    src[3+2*stride]=(t1 + t2 + 1)>>1;
-    src[3+0*stride]=(t2 + t3 + 1)>>1;
-    src[0+1*stride]=
-    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
-    src[1+1*stride]=
-    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
-    src[2+1*stride]=
-    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
-    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-}
-
-static void pred4x4_vertical_left_c(uint8_t *src, const uint8_t *topright, int stride){
-    LOAD_TOP_EDGE
-    LOAD_TOP_RIGHT_EDGE
-
-    src[0+0*stride]=(t0 + t1 + 1)>>1;
-    src[1+0*stride]=
-    src[0+2*stride]=(t1 + t2 + 1)>>1;
-    src[2+0*stride]=
-    src[1+2*stride]=(t2 + t3 + 1)>>1;
-    src[3+0*stride]=
-    src[2+2*stride]=(t3 + t4+ 1)>>1;
-    src[3+2*stride]=(t4 + t5+ 1)>>1;
-    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[1+1*stride]=
-    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-    src[2+1*stride]=
-    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
-    src[3+1*stride]=
-    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
-    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
-}
-
 static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride,
-                                      const int l0, const int l1, const int l2, const int l3, const int l4){
+                                       const int l0, const int l1, const int l2, const int l3, const int l4){
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 
@@ -364,27 +182,6 @@ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, i
     src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
 }
 
-static void pred4x4_horizontal_up_c(uint8_t *src, const uint8_t *topright, int stride){
-    LOAD_LEFT_EDGE
-
-    src[0+0*stride]=(l0 + l1 + 1)>>1;
-    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-    src[2+0*stride]=
-    src[0+1*stride]=(l1 + l2 + 1)>>1;
-    src[3+0*stride]=
-    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
-    src[2+1*stride]=
-    src[0+2*stride]=(l2 + l3 + 1)>>1;
-    src[3+1*stride]=
-    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
-    src[3+2*stride]=
-    src[1+3*stride]=
-    src[0+3*stride]=
-    src[2+2*stride]=
-    src[2+3*stride]=
-    src[3+3*stride]=l3;
-}
-
 static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
     LOAD_LEFT_EDGE
     LOAD_DOWN_LEFT_EDGE
@@ -432,29 +229,6 @@ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *top
     src[3+3*stride]=l3;
 }
 
-static void pred4x4_horizontal_down_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE
-    LOAD_LEFT_EDGE
-
-    src[0+0*stride]=
-    src[2+1*stride]=(lt + l0 + 1)>>1;
-    src[1+0*stride]=
-    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
-    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
-    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[0+1*stride]=
-    src[2+2*stride]=(l0 + l1 + 1)>>1;
-    src[1+1*stride]=
-    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
-    src[0+2*stride]=
-    src[2+3*stride]=(l1 + l2+ 1)>>1;
-    src[1+2*stride]=
-    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-    src[0+3*stride]=(l2 + l3 + 1)>>1;
-    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
-}
-
 static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
@@ -470,166 +244,6 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
     }
 }
 
-static void pred16x16_vertical_c(uint8_t *src, int stride){
-    int i;
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    const uint32_t b= ((uint32_t*)(src-stride))[1];
-    const uint32_t c= ((uint32_t*)(src-stride))[2];
-    const uint32_t d= ((uint32_t*)(src-stride))[3];
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]= a;
-        ((uint32_t*)(src+i*stride))[1]= b;
-        ((uint32_t*)(src+i*stride))[2]= c;
-        ((uint32_t*)(src+i*stride))[3]= d;
-    }
-}
-
-static void pred16x16_horizontal_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
-    }
-}
-
-static void pred16x16_dc_c(uint8_t *src, int stride){
-    int i, dc=0;
-
-    for(i=0;i<16; i++){
-        dc+= src[-1+i*stride];
-    }
-
-    for(i=0;i<16; i++){
-        dc+= src[i-stride];
-    }
-
-    dc= 0x01010101*((dc + 16)>>5);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
-}
-
-static void pred16x16_left_dc_c(uint8_t *src, int stride){
-    int i, dc=0;
-
-    for(i=0;i<16; i++){
-        dc+= src[-1+i*stride];
-    }
-
-    dc= 0x01010101*((dc + 8)>>4);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
-}
-
-static void pred16x16_top_dc_c(uint8_t *src, int stride){
-    int i, dc=0;
-
-    for(i=0;i<16; i++){
-        dc+= src[i-stride];
-    }
-    dc= 0x01010101*((dc + 8)>>4);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
-}
-
-static void pred16x16_128_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
-    }
-}
-
-static void pred16x16_127_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*127U;
-    }
-}
-
-static void pred16x16_129_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*129U;
-    }
-}
-
-static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){
-  int i, j, k;
-  int a;
-  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-  const uint8_t * const src0 = src+7-stride;
-  const uint8_t *src1 = src+8*stride-1;
-  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
-  int H = src0[1] - src0[-1];
-  int V = src1[0] - src2[ 0];
-  for(k=2; k<=8; ++k) {
-    src1 += stride; src2 -= stride;
-    H += k*(src0[k] - src0[-k]);
-    V += k*(src1[0] - src2[ 0]);
-  }
-  if(svq3){
-    H = ( 5*(H/4) ) / 16;
-    V = ( 5*(V/4) ) / 16;
-
-    /* required for 100% accuracy */
-    i = H; H = V; V = i;
-  }else if(rv40){
-    H = ( H + (H>>2) ) >> 4;
-    V = ( V + (V>>2) ) >> 4;
-  }else{
-    H = ( 5*H+32 ) >> 6;
-    V = ( 5*V+32 ) >> 6;
-  }
-
-  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
-  for(j=16; j>0; --j) {
-    int b = a;
-    a += V;
-    for(i=-16; i<0; i+=4) {
-      src[16+i] = cm[ (b    ) >> 5 ];
-      src[17+i] = cm[ (b+  H) >> 5 ];
-      src[18+i] = cm[ (b+2*H) >> 5 ];
-      src[19+i] = cm[ (b+3*H) >> 5 ];
-      b += 4*H;
-    }
-    src += stride;
-  }
-}
-
-static void pred16x16_plane_c(uint8_t *src, int stride){
-    pred16x16_plane_compat_c(src, stride, 0, 0);
-}
-
 static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
     pred16x16_plane_compat_c(src, stride, 1, 0);
 }
@@ -665,74 +279,6 @@ static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_vertical_c(uint8_t *src, int stride){
-    int i;
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    const uint32_t b= ((uint32_t*)(src-stride))[1];
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= a;
-        ((uint32_t*)(src+i*stride))[1]= b;
-    }
-}
-
-static void pred8x8_horizontal_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
-    }
-}
-
-static void pred8x8_128_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
-    }
-}
-
-static void pred8x8_127_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*127U;
-    }
-}
-static void pred8x8_129_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*129U;
-    }
-}
-
-static void pred8x8_left_dc_c(uint8_t *src, int stride){
-    int i;
-    int dc0, dc2;
-
-    dc0=dc2=0;
-    for(i=0;i<4; i++){
-        dc0+= src[-1+i*stride];
-        dc2+= src[-1+(i+4)*stride];
-    }
-    dc0= 0x01010101*((dc0 + 2)>>2);
-    dc2= 0x01010101*((dc2 + 2)>>2);
-
-    for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= dc0;
-    }
-    for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= dc2;
-    }
-}
-
 static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
     int i;
     int dc0;
@@ -748,28 +294,6 @@ static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_top_dc_c(uint8_t *src, int stride){
-    int i;
-    int dc0, dc1;
-
-    dc0=dc1=0;
-    for(i=0;i<4; i++){
-        dc0+= src[i-stride];
-        dc1+= src[4+i-stride];
-    }
-    dc0= 0x01010101*((dc0 + 2)>>2);
-    dc1= 0x01010101*((dc1 + 2)>>2);
-
-    for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
-    }
-    for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
-    }
-}
-
 static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
     int i;
     int dc0;
@@ -785,55 +309,6 @@ static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-
-static void pred8x8_dc_c(uint8_t *src, int stride){
-    int i;
-    int dc0, dc1, dc2, dc3;
-
-    dc0=dc1=dc2=0;
-    for(i=0;i<4; i++){
-        dc0+= src[-1+i*stride] + src[i-stride];
-        dc1+= src[4+i-stride];
-        dc2+= src[-1+(i+4)*stride];
-    }
-    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
-    dc0= 0x01010101*((dc0 + 4)>>3);
-    dc1= 0x01010101*((dc1 + 2)>>2);
-    dc2= 0x01010101*((dc2 + 2)>>2);
-
-    for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
-    }
-    for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc2;
-        ((uint32_t*)(src+i*stride))[1]= dc3;
-    }
-}
-
-//the following 4 function should not be optimized!
-static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){
-    pred8x8_top_dc_c(src, stride);
-    pred4x4_dc_c(src, NULL, stride);
-}
-
-static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){
-    pred8x8_dc_c(src, stride);
-    pred4x4_top_dc_c(src, NULL, stride);
-}
-
-static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){
-    pred8x8_left_dc_c(src, stride);
-    pred4x4_128_dc_c(src + 4*stride    , NULL, stride);
-    pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride);
-}
-
-static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){
-    pred8x8_left_dc_c(src, stride);
-    pred4x4_128_dc_c(src    , NULL, stride);
-    pred4x4_128_dc_c(src + 4, NULL, stride);
-}
-
 static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
     int i;
     int dc0=0;
@@ -855,39 +330,6 @@ static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_plane_c(uint8_t *src, int stride){
-  int j, k;
-  int a;
-  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-  const uint8_t * const src0 = src+3-stride;
-  const uint8_t *src1 = src+4*stride-1;
-  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
-  int H = src0[1] - src0[-1];
-  int V = src1[0] - src2[ 0];
-  for(k=2; k<=4; ++k) {
-    src1 += stride; src2 -= stride;
-    H += k*(src0[k] - src0[-k]);
-    V += k*(src1[0] - src2[ 0]);
-  }
-  H = ( 17*H+16 ) >> 5;
-  V = ( 17*V+16 ) >> 5;
-
-  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
-  for(j=8; j>0; --j) {
-    int b = a;
-    a += V;
-    src[0] = cm[ (b    ) >> 5 ];
-    src[1] = cm[ (b+  H) >> 5 ];
-    src[2] = cm[ (b+2*H) >> 5 ];
-    src[3] = cm[ (b+3*H) >> 5 ];
-    src[4] = cm[ (b+4*H) >> 5 ];
-    src[5] = cm[ (b+5*H) >> 5 ];
-    src[6] = cm[ (b+6*H) >> 5 ];
-    src[7] = cm[ (b+7*H) >> 5 ];
-    src += stride;
-  }
-}
-
 static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
@@ -907,335 +349,6 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
     }
 }
 
-#define SRC(x,y) src[(x)+(y)*stride]
-#define PL(y) \
-    const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_LEFT \
-    const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
-                     + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
-    PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
-    const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
-
-#define PT(x) \
-    const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_TOP \
-    const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
-                     + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
-    PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
-    const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
-                     + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
-
-#define PTR(x) \
-    t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_TOPRIGHT \
-    int t8, t9, t10, t11, t12, t13, t14, t15; \
-    if(has_topright) { \
-        PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
-        t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
-    } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
-
-#define PREDICT_8x8_LOAD_TOPLEFT \
-    const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
-
-#define PREDICT_8x8_DC(v) \
-    int y; \
-    for( y = 0; y < 8; y++ ) { \
-        ((uint32_t*)src)[0] = \
-        ((uint32_t*)src)[1] = v; \
-        src += stride; \
-    }
-
-static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_DC(0x80808080);
-}
-static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_LEFT;
-    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
-    PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_TOP;
-    const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
-    PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_LEFT;
-    PREDICT_8x8_LOAD_TOP;
-    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
-                         +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
-    PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_LEFT;
-#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
-               ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
-    ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
-#undef ROW
-}
-static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    int y;
-    PREDICT_8x8_LOAD_TOP;
-    src[0] = t0;
-    src[1] = t1;
-    src[2] = t2;
-    src[3] = t3;
-    src[4] = t4;
-    src[5] = t5;
-    src[6] = t6;
-    src[7] = t7;
-    for( y = 1; y < 8; y++ )
-        *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
-}
-static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_TOP;
-    PREDICT_8x8_LOAD_TOPRIGHT;
-    SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
-    SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
-    SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
-    SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
-    SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
-    SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
-    SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
-    SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
-    SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
-    SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
-    SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
-    SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
-    SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
-    SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
-    SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
-}
-static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_TOP;
-    PREDICT_8x8_LOAD_LEFT;
-    PREDICT_8x8_LOAD_TOPLEFT;
-    SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
-    SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
-    SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
-    SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
-    SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
-    SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
-    SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
-    SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
-    SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
-    SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
-    SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
-    SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
-    SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
-    SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
-    SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
-
-}
-static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_TOP;
-    PREDICT_8x8_LOAD_LEFT;
-    PREDICT_8x8_LOAD_TOPLEFT;
-    SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
-    SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
-    SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
-    SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
-    SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
-    SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
-    SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
-    SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
-    SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
-    SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
-    SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
-    SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
-    SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
-    SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
-    SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
-    SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
-    SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
-    SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
-    SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
-    SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
-    SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
-    SRC(7,0)= (t6 + t7 + 1) >> 1;
-}
-static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_TOP;
-    PREDICT_8x8_LOAD_LEFT;
-    PREDICT_8x8_LOAD_TOPLEFT;
-    SRC(0,7)= (l6 + l7 + 1) >> 1;
-    SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
-    SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
-    SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
-    SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
-    SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
-    SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
-    SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
-    SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
-    SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
-    SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
-    SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
-    SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
-    SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
-    SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
-    SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
-    SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
-    SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
-    SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
-    SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
-    SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
-    SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
-}
-static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_TOP;
-    PREDICT_8x8_LOAD_TOPRIGHT;
-    SRC(0,0)= (t0 + t1 + 1) >> 1;
-    SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
-    SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
-    SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
-    SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
-    SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
-    SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
-    SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
-    SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
-    SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
-    SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
-    SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
-    SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
-    SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
-    SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
-    SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
-    SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
-    SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
-    SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
-    SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
-    SRC(7,6)= (t10 + t11 + 1) >> 1;
-    SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
-}
-static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
-    PREDICT_8x8_LOAD_LEFT;
-    SRC(0,0)= (l0 + l1 + 1) >> 1;
-    SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
-    SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
-    SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
-    SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
-    SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
-    SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
-    SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
-    SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
-    SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
-    SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
-    SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
-    SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
-    SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
-    SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
-    SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
-    SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
-    SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
-}
-#undef PREDICT_8x8_LOAD_LEFT
-#undef PREDICT_8x8_LOAD_TOP
-#undef PREDICT_8x8_LOAD_TOPLEFT
-#undef PREDICT_8x8_LOAD_TOPRIGHT
-#undef PREDICT_8x8_DC
-#undef PTR
-#undef PT
-#undef PL
-#undef SRC
-
-static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
-    int i;
-    pix -= stride;
-    for(i=0; i<4; i++){
-        uint8_t v = pix[0];
-        pix[1*stride]= v += block[0];
-        pix[2*stride]= v += block[4];
-        pix[3*stride]= v += block[8];
-        pix[4*stride]= v +  block[12];
-        pix++;
-        block++;
-    }
-}
-
-static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
-    int i;
-    for(i=0; i<4; i++){
-        uint8_t v = pix[-1];
-        pix[0]= v += block[0];
-        pix[1]= v += block[1];
-        pix[2]= v += block[2];
-        pix[3]= v +  block[3];
-        pix+= stride;
-        block+= 4;
-    }
-}
-
-static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
-    int i;
-    pix -= stride;
-    for(i=0; i<8; i++){
-        uint8_t v = pix[0];
-        pix[1*stride]= v += block[0];
-        pix[2*stride]= v += block[8];
-        pix[3*stride]= v += block[16];
-        pix[4*stride]= v += block[24];
-        pix[5*stride]= v += block[32];
-        pix[6*stride]= v += block[40];
-        pix[7*stride]= v += block[48];
-        pix[8*stride]= v +  block[56];
-        pix++;
-        block++;
-    }
-}
-
-static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
-    int i;
-    for(i=0; i<8; i++){
-        uint8_t v = pix[-1];
-        pix[0]= v += block[0];
-        pix[1]= v += block[1];
-        pix[2]= v += block[2];
-        pix[3]= v += block[3];
-        pix[4]= v += block[4];
-        pix[5]= v += block[5];
-        pix[6]= v += block[6];
-        pix[7]= v +  block[7];
-        pix+= stride;
-        block+= 8;
-    }
-}
-
-static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
-    int i;
-    for(i=0; i<16; i++)
-        pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
-}
-
-static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
-    int i;
-    for(i=0; i<16; i++)
-        pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
-}
-
-static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
-    int i;
-    for(i=0; i<4; i++)
-        pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
-}
-
-static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
-    int i;
-    for(i=0; i<4; i++)
-        pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
-}
-
-
 /**
  * Set the intra prediction function pointers.
  */
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
new file mode 100644
index 0000000000..7856315a03
--- /dev/null
+++ b/libavcodec/h264pred_template.c
@@ -0,0 +1,912 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG4 part10 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "mathops.h"
+#include "dsputil.h"
+
+static void pred4x4_vertical_c(uint8_t *src, const uint8_t *topright, int stride){
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    ((uint32_t*)(src+0*stride))[0]= a;
+    ((uint32_t*)(src+1*stride))[0]= a;
+    ((uint32_t*)(src+2*stride))[0]= a;
+    ((uint32_t*)(src+3*stride))[0]= a;
+}
+
+static void pred4x4_horizontal_c(uint8_t *src, const uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
+    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
+    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
+    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
+}
+
+static void pred4x4_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
+                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_left_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_top_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_128_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
+}
+
+static void pred4x4_127_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= 127U*0x01010101U;
+}
+
+static void pred4x4_129_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= 129U*0x01010101U;
+}
+
+
+#define LOAD_TOP_RIGHT_EDGE\
+    const int av_unused t4= topright[0];\
+    const int av_unused t5= topright[1];\
+    const int av_unused t6= topright[2];\
+    const int av_unused t7= topright[3];\
+
+#define LOAD_DOWN_LEFT_EDGE\
+    const int av_unused l4= src[-1+4*stride];\
+    const int av_unused l5= src[-1+5*stride];\
+    const int av_unused l6= src[-1+6*stride];\
+    const int av_unused l7= src[-1+7*stride];\
+
+#define LOAD_LEFT_EDGE\
+    const int av_unused l0= src[-1+0*stride];\
+    const int av_unused l1= src[-1+1*stride];\
+    const int av_unused l2= src[-1+2*stride];\
+    const int av_unused l3= src[-1+3*stride];\
+
+#define LOAD_TOP_EDGE\
+    const int av_unused t0= src[ 0-1*stride];\
+    const int av_unused t1= src[ 1-1*stride];\
+    const int av_unused t2= src[ 2-1*stride];\
+    const int av_unused t3= src[ 3-1*stride];\
+
+static void pred4x4_down_right_c(uint8_t *src, const uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
+    src[0+1*stride]=
+    src[1+2*stride]=
+    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
+    src[0+0*stride]=
+    src[1+1*stride]=
+    src[2+2*stride]=
+    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+0*stride]=
+    src[2+1*stride]=
+    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+0*stride]=
+    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+}
+
+static void pred4x4_down_left_c(uint8_t *src, const uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+//    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
+    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
+}
+
+static void pred4x4_vertical_right_c(uint8_t *src, const uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[1+2*stride]=(lt + t0 + 1)>>1;
+    src[1+0*stride]=
+    src[2+2*stride]=(t0 + t1 + 1)>>1;
+    src[2+0*stride]=
+    src[3+2*stride]=(t1 + t2 + 1)>>1;
+    src[3+0*stride]=(t2 + t3 + 1)>>1;
+    src[0+1*stride]=
+    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+1*stride]=
+    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+1*stride]=
+    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_c(uint8_t *src, const uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t0 + t1 + 1)>>1;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_c(uint8_t *src, const uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(l0 + l1 + 1)>>1;
+    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[2+0*stride]=
+    src[0+1*stride]=(l1 + l2 + 1)>>1;
+    src[3+0*stride]=
+    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+    src[2+1*stride]=
+    src[0+2*stride]=(l2 + l3 + 1)>>1;
+    src[3+1*stride]=
+    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
+    src[3+2*stride]=
+    src[1+3*stride]=
+    src[0+3*stride]=
+    src[2+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void pred4x4_horizontal_down_c(uint8_t *src, const uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[2+1*stride]=(lt + l0 + 1)>>1;
+    src[1+0*stride]=
+    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[0+1*stride]=
+    src[2+2*stride]=(l0 + l1 + 1)>>1;
+    src[1+1*stride]=
+    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[2+3*stride]=(l1 + l2+ 1)>>1;
+    src[1+2*stride]=
+    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[0+3*stride]=(l2 + l3 + 1)>>1;
+    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+}
+
+static void pred16x16_vertical_c(uint8_t *src, int stride){
+    int i;
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    const uint32_t b= ((uint32_t*)(src-stride))[1];
+    const uint32_t c= ((uint32_t*)(src-stride))[2];
+    const uint32_t d= ((uint32_t*)(src-stride))[3];
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]= a;
+        ((uint32_t*)(src+i*stride))[1]= b;
+        ((uint32_t*)(src+i*stride))[2]= c;
+        ((uint32_t*)(src+i*stride))[3]= d;
+    }
+}
+
+static void pred16x16_horizontal_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
+    }
+}
+
+static void pred16x16_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+
+    dc= 0x01010101*((dc + 16)>>5);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_left_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    dc= 0x01010101*((dc + 8)>>4);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_top_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+    dc= 0x01010101*((dc + 8)>>4);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_128_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
+    }
+}
+
+static void pred16x16_127_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*127U;
+    }
+}
+
+static void pred16x16_129_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*129U;
+    }
+}
+
+static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){
+  int i, j, k;
+  int a;
+  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+7-stride;
+  const uint8_t *src1 = src+8*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=8; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  if(svq3){
+    H = ( 5*(H/4) ) / 16;
+    V = ( 5*(V/4) ) / 16;
+
+    /* required for 100% accuracy */
+    i = H; H = V; V = i;
+  }else if(rv40){
+    H = ( H + (H>>2) ) >> 4;
+    V = ( V + (V>>2) ) >> 4;
+  }else{
+    H = ( 5*H+32 ) >> 6;
+    V = ( 5*V+32 ) >> 6;
+  }
+
+  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    for(i=-16; i<0; i+=4) {
+      src[16+i] = cm[ (b    ) >> 5 ];
+      src[17+i] = cm[ (b+  H) >> 5 ];
+      src[18+i] = cm[ (b+2*H) >> 5 ];
+      src[19+i] = cm[ (b+3*H) >> 5 ];
+      b += 4*H;
+    }
+    src += stride;
+  }
+}
+
+static void pred16x16_plane_c(uint8_t *src, int stride){
+    pred16x16_plane_compat_c(src, stride, 0, 0);
+}
+
+static void pred8x8_vertical_c(uint8_t *src, int stride){
+    int i;
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    const uint32_t b= ((uint32_t*)(src-stride))[1];
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= a;
+        ((uint32_t*)(src+i*stride))[1]= b;
+    }
+}
+
+static void pred8x8_horizontal_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
+    }
+}
+
+static void pred8x8_128_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
+    }
+}
+
+static void pred8x8_127_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*127U;
+    }
+}
+static void pred8x8_129_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*129U;
+    }
+}
+
+static void pred8x8_left_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc2;
+
+    dc0=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc0= 0x01010101*((dc0 + 2)>>2);
+    dc2= 0x01010101*((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc2;
+    }
+}
+
+static void pred8x8_top_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc1;
+
+    dc0=dc1=0;
+    for(i=0;i<4; i++){
+        dc0+= src[i-stride];
+        dc1+= src[4+i-stride];
+    }
+    dc0= 0x01010101*((dc0 + 2)>>2);
+    dc1= 0x01010101*((dc1 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+}
+
+static void pred8x8_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc1, dc2, dc3;
+
+    dc0=dc1=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc1+= src[4+i-stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
+    dc0= 0x01010101*((dc0 + 4)>>3);
+    dc1= 0x01010101*((dc1 + 2)>>2);
+    dc2= 0x01010101*((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc2;
+        ((uint32_t*)(src+i*stride))[1]= dc3;
+    }
+}
+
+//the following 4 function should not be optimized!
+static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){
+    pred8x8_top_dc_c(src, stride);
+    pred4x4_dc_c(src, NULL, stride);
+}
+
+static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){
+    pred8x8_dc_c(src, stride);
+    pred4x4_top_dc_c(src, NULL, stride);
+}
+
+static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){
+    pred8x8_left_dc_c(src, stride);
+    pred4x4_128_dc_c(src + 4*stride    , NULL, stride);
+    pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride);
+}
+
+static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){
+    pred8x8_left_dc_c(src, stride);
+    pred4x4_128_dc_c(src    , NULL, stride);
+    pred4x4_128_dc_c(src + 4, NULL, stride);
+}
+
+static void pred8x8_plane_c(uint8_t *src, int stride){
+  int j, k;
+  int a;
+  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+3-stride;
+  const uint8_t *src1 = src+4*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=4; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  H = ( 17*H+16 ) >> 5;
+  V = ( 17*V+16 ) >> 5;
+
+  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
+  for(j=8; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = cm[ (b    ) >> 5 ];
+    src[1] = cm[ (b+  H) >> 5 ];
+    src[2] = cm[ (b+2*H) >> 5 ];
+    src[3] = cm[ (b+3*H) >> 5 ];
+    src[4] = cm[ (b+4*H) >> 5 ];
+    src[5] = cm[ (b+5*H) >> 5 ];
+    src[6] = cm[ (b+6*H) >> 5 ];
+    src[7] = cm[ (b+7*H) >> 5 ];
+    src += stride;
+  }
+}
+
+#define SRC(x,y) src[(x)+(y)*stride]
+#define PL(y) \
+    const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_LEFT \
+    const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
+                     + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
+    PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
+    const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
+
+#define PT(x) \
+    const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOP \
+    const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
+                     + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
+    PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
+    const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
+                     + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
+
+#define PTR(x) \
+    t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOPRIGHT \
+    int t8, t9, t10, t11, t12, t13, t14, t15; \
+    if(has_topright) { \
+        PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
+        t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
+    } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
+
+#define PREDICT_8x8_LOAD_TOPLEFT \
+    const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
+
+#define PREDICT_8x8_DC(v) \
+    int y; \
+    for( y = 0; y < 8; y++ ) { \
+        ((uint32_t*)src)[0] = \
+        ((uint32_t*)src)[1] = v; \
+        src += stride; \
+    }
+
+static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_DC(0x80808080);
+}
+static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOP;
+    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
+                         +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
+               ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
+    ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
+#undef ROW
+}
+static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    int y;
+    PREDICT_8x8_LOAD_TOP;
+    src[0] = t0;
+    src[1] = t1;
+    src[2] = t2;
+    src[3] = t3;
+    src[4] = t4;
+    src[5] = t5;
+    src[6] = t6;
+    src[7] = t7;
+    for( y = 1; y < 8; y++ )
+        *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
+}
+static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
+    SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
+    SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
+    SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
+    SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
+}
+static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
+    SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+
+}
+static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
+    SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
+    SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
+    SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
+    SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
+    SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
+    SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
+    SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(7,0)= (t6 + t7 + 1) >> 1;
+}
+static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l6 + l7 + 1) >> 1;
+    SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
+    SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
+    SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
+    SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
+    SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
+    SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
+    SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
+    SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
+    SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
+    SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
+    SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
+    SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
+    SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
+    SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
+}
+static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + t1 + 1) >> 1;
+    SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
+    SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
+    SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
+    SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
+    SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
+    SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
+    SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
+    SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
+    SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
+    SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(7,6)= (t10 + t11 + 1) >> 1;
+    SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
+}
+static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    SRC(0,0)= (l0 + l1 + 1) >> 1;
+    SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
+    SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
+    SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
+    SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
+    SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
+    SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
+    SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
+    SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
+    SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
+    SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
+    SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
+}
+#undef PREDICT_8x8_LOAD_LEFT
+#undef PREDICT_8x8_LOAD_TOP
+#undef PREDICT_8x8_LOAD_TOPLEFT
+#undef PREDICT_8x8_LOAD_TOPRIGHT
+#undef PREDICT_8x8_DC
+#undef PTR
+#undef PT
+#undef PL
+#undef SRC
+
+static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+    int i;
+    pix -= stride;
+    for(i=0; i<4; i++){
+        uint8_t v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[4];
+        pix[3*stride]= v += block[8];
+        pix[4*stride]= v +  block[12];
+        pix++;
+        block++;
+    }
+}
+
+static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+    int i;
+    for(i=0; i<4; i++){
+        uint8_t v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v +  block[3];
+        pix+= stride;
+        block+= 4;
+    }
+}
+
+static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+    int i;
+    pix -= stride;
+    for(i=0; i<8; i++){
+        uint8_t v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[8];
+        pix[3*stride]= v += block[16];
+        pix[4*stride]= v += block[24];
+        pix[5*stride]= v += block[32];
+        pix[6*stride]= v += block[40];
+        pix[7*stride]= v += block[48];
+        pix[8*stride]= v +  block[56];
+        pix++;
+        block++;
+    }
+}
+
+static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+    int i;
+    for(i=0; i<8; i++){
+        uint8_t v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v += block[3];
+        pix[4]= v += block[4];
+        pix[5]= v += block[5];
+        pix[6]= v += block[6];
+        pix[7]= v +  block[7];
+        pix+= stride;
+        block+= 8;
+    }
+}
+
+static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+    int i;
+    for(i=0; i<16; i++)
+        pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
+}
+
+static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+    int i;
+    for(i=0; i<16; i++)
+        pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
+}
+
+static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+    int i;
+    for(i=0; i<4; i++)
+        pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
+}
+
+static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+    int i;
+    for(i=0; i<4; i++)
+        pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
+}
-- 
cgit v1.2.3


From 15fb393be67c73f58d96c2c1bb187d037723835a Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:50 +0200
Subject: Move the functions in h264idct into a new file h264idct_template.c.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264idct.c          | 249 +-----------------------------------
 libavcodec/h264idct_template.c | 278 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 279 insertions(+), 248 deletions(-)
 create mode 100644 libavcodec/h264idct_template.c

(limited to 'libavcodec')

diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index 790f1883fd..d79777571a 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -25,251 +25,4 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#include "dsputil.h"
-
-static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
-    int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    block[0] += 1<<(shift-1);
-
-    for(i=0; i<4; i++){
-        const int z0=  block[i + block_stride*0]     +  block[i + block_stride*2];
-        const int z1=  block[i + block_stride*0]     -  block[i + block_stride*2];
-        const int z2= (block[i + block_stride*1]>>1) -  block[i + block_stride*3];
-        const int z3=  block[i + block_stride*1]     + (block[i + block_stride*3]>>1);
-
-        block[i + block_stride*0]= z0 + z3;
-        block[i + block_stride*1]= z1 + z2;
-        block[i + block_stride*2]= z1 - z2;
-        block[i + block_stride*3]= z0 - z3;
-    }
-
-    for(i=0; i<4; i++){
-        const int z0=  block[0 + block_stride*i]     +  block[2 + block_stride*i];
-        const int z1=  block[0 + block_stride*i]     -  block[2 + block_stride*i];
-        const int z2= (block[1 + block_stride*i]>>1) -  block[3 + block_stride*i];
-        const int z3=  block[1 + block_stride*i]     + (block[3 + block_stride*i]>>1);
-
-        dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
-        dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
-        dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
-        dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
-    }
-}
-
-void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
-    idct_internal(dst, block, stride, 4, 6, 1);
-}
-
-void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
-    idct_internal(dst, block, stride, 8, 3, 1);
-}
-
-void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
-    idct_internal(dst, block, stride, 8, 3, 0);
-}
-
-void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
-    int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    block[0] += 32;
-
-    for( i = 0; i < 8; i++ )
-    {
-        const int a0 =  block[i+0*8] + block[i+4*8];
-        const int a2 =  block[i+0*8] - block[i+4*8];
-        const int a4 = (block[i+2*8]>>1) - block[i+6*8];
-        const int a6 = (block[i+6*8]>>1) + block[i+2*8];
-
-        const int b0 = a0 + a6;
-        const int b2 = a2 + a4;
-        const int b4 = a2 - a4;
-        const int b6 = a0 - a6;
-
-        const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
-        const int a3 =  block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
-        const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
-        const int a7 =  block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
-
-        const int b1 = (a7>>2) + a1;
-        const int b3 =  a3 + (a5>>2);
-        const int b5 = (a3>>2) - a5;
-        const int b7 =  a7 - (a1>>2);
-
-        block[i+0*8] = b0 + b7;
-        block[i+7*8] = b0 - b7;
-        block[i+1*8] = b2 + b5;
-        block[i+6*8] = b2 - b5;
-        block[i+2*8] = b4 + b3;
-        block[i+5*8] = b4 - b3;
-        block[i+3*8] = b6 + b1;
-        block[i+4*8] = b6 - b1;
-    }
-    for( i = 0; i < 8; i++ )
-    {
-        const int a0 =  block[0+i*8] + block[4+i*8];
-        const int a2 =  block[0+i*8] - block[4+i*8];
-        const int a4 = (block[2+i*8]>>1) - block[6+i*8];
-        const int a6 = (block[6+i*8]>>1) + block[2+i*8];
-
-        const int b0 = a0 + a6;
-        const int b2 = a2 + a4;
-        const int b4 = a2 - a4;
-        const int b6 = a0 - a6;
-
-        const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
-        const int a3 =  block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
-        const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
-        const int a7 =  block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
-
-        const int b1 = (a7>>2) + a1;
-        const int b3 =  a3 + (a5>>2);
-        const int b5 = (a3>>2) - a5;
-        const int b7 =  a7 - (a1>>2);
-
-        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
-        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
-        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
-        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
-        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
-        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
-        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
-        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
-    }
-}
-
-// assumes all AC coefs are 0
-void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
-    int i, j;
-    int dc = (block[0] + 32) >> 6;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
-    for( j = 0; j < 4; j++ )
-    {
-        for( i = 0; i < 4; i++ )
-            dst[i] = cm[ dst[i] ];
-        dst += stride;
-    }
-}
-
-void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
-    int i, j;
-    int dc = (block[0] + 32) >> 6;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
-    for( j = 0; j < 8; j++ )
-    {
-        for( i = 0; i < 8; i++ )
-            dst[i] = cm[ dst[i] ];
-        dst += stride;
-    }
-}
-
-//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-static const uint8_t scan8[16 + 2*4]={
- 4+1*8, 5+1*8, 4+2*8, 5+2*8,
- 6+1*8, 7+1*8, 6+2*8, 7+2*8,
- 4+3*8, 5+3*8, 4+4*8, 5+4*8,
- 6+3*8, 7+3*8, 6+4*8, 7+4*8,
- 1+1*8, 2+1*8,
- 1+2*8, 2+2*8,
- 1+4*8, 2+4*8,
- 1+5*8, 2+5*8,
-};
-
-void ff_h264_idct_add16_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
-    int i;
-    for(i=0; i<16; i++){
-        int nnz = nnzc[ scan8[i] ];
-        if(nnz){
-            if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
-            else                      idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
-        }
-    }
-}
-
-void ff_h264_idct_add16intra_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
-    int i;
-    for(i=0; i<16; i++){
-        if(nnzc[ scan8[i] ]) idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
-        else if(block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
-    }
-}
-
-void ff_h264_idct8_add4_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
-    int i;
-    for(i=0; i<16; i+=4){
-        int nnz = nnzc[ scan8[i] ];
-        if(nnz){
-            if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_c(dst + block_offset[i], block + i*16, stride);
-            else                      ff_h264_idct8_add_c   (dst + block_offset[i], block + i*16, stride);
-        }
-    }
-}
-
-void ff_h264_idct_add8_c(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
-    int i;
-    for(i=16; i<16+8; i++){
-        if(nnzc[ scan8[i] ])
-            ff_h264_idct_add_c   (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
-        else if(block[i*16])
-            ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
-    }
-}
-/**
- * IDCT transforms the 16 dc values and dequantizes them.
- * @param qp quantization parameter
- */
-void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul){
-#define stride 16
-    int i;
-    int temp[16];
-    static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
-
-    for(i=0; i<4; i++){
-        const int z0= input[4*i+0] + input[4*i+1];
-        const int z1= input[4*i+0] - input[4*i+1];
-        const int z2= input[4*i+2] - input[4*i+3];
-        const int z3= input[4*i+2] + input[4*i+3];
-
-        temp[4*i+0]= z0+z3;
-        temp[4*i+1]= z0-z3;
-        temp[4*i+2]= z1-z2;
-        temp[4*i+3]= z1+z2;
-    }
-
-    for(i=0; i<4; i++){
-        const int offset= x_offset[i];
-        const int z0= temp[4*0+i] + temp[4*2+i];
-        const int z1= temp[4*0+i] - temp[4*2+i];
-        const int z2= temp[4*1+i] - temp[4*3+i];
-        const int z3= temp[4*1+i] + temp[4*3+i];
-
-        output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8));
-        output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
-        output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
-        output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
-    }
-#undef stride
-}
-
-void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
-    const int stride= 16*2;
-    const int xStride= 16;
-    int a,b,c,d,e;
-
-    a= block[stride*0 + xStride*0];
-    b= block[stride*0 + xStride*1];
-    c= block[stride*1 + xStride*0];
-    d= block[stride*1 + xStride*1];
-
-    e= a-b;
-    a= a+b;
-    b= c-d;
-    c= c+d;
-
-    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
-    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
-    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
-    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
-}
+#include "h264idct_template.c"
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
new file mode 100644
index 0000000000..3069e41797
--- /dev/null
+++ b/libavcodec/h264idct_template.c
@@ -0,0 +1,278 @@
+/*
+ * H.264 IDCT
+ * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 IDCT.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "dsputil.h"
+
+#ifndef AVCODEC_H264IDCT_INTERNAL_H
+#define AVCODEC_H264IDCT_INTERNAL_H
+//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
+static const uint8_t scan8[16 + 2*4]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+};
+#endif
+
+static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    block[0] += 1<<(shift-1);
+
+    for(i=0; i<4; i++){
+        const int z0=  block[i + block_stride*0]     +  block[i + block_stride*2];
+        const int z1=  block[i + block_stride*0]     -  block[i + block_stride*2];
+        const int z2= (block[i + block_stride*1]>>1) -  block[i + block_stride*3];
+        const int z3=  block[i + block_stride*1]     + (block[i + block_stride*3]>>1);
+
+        block[i + block_stride*0]= z0 + z3;
+        block[i + block_stride*1]= z1 + z2;
+        block[i + block_stride*2]= z1 - z2;
+        block[i + block_stride*3]= z0 - z3;
+    }
+
+    for(i=0; i<4; i++){
+        const int z0=  block[0 + block_stride*i]     +  block[2 + block_stride*i];
+        const int z1=  block[0 + block_stride*i]     -  block[2 + block_stride*i];
+        const int z2= (block[1 + block_stride*i]>>1) -  block[3 + block_stride*i];
+        const int z3=  block[1 + block_stride*i]     + (block[3 + block_stride*i]>>1);
+
+        dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
+        dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
+        dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
+        dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
+    }
+}
+
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    idct_internal(dst, block, stride, 4, 6, 1);
+}
+
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 1);
+}
+
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 0);
+}
+
+void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    block[0] += 32;
+
+    for( i = 0; i < 8; i++ )
+    {
+        const int a0 =  block[i+0*8] + block[i+4*8];
+        const int a2 =  block[i+0*8] - block[i+4*8];
+        const int a4 = (block[i+2*8]>>1) - block[i+6*8];
+        const int a6 = (block[i+6*8]>>1) + block[i+2*8];
+
+        const int b0 = a0 + a6;
+        const int b2 = a2 + a4;
+        const int b4 = a2 - a4;
+        const int b6 = a0 - a6;
+
+        const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
+        const int a3 =  block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
+        const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
+        const int a7 =  block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
+
+        const int b1 = (a7>>2) + a1;
+        const int b3 =  a3 + (a5>>2);
+        const int b5 = (a3>>2) - a5;
+        const int b7 =  a7 - (a1>>2);
+
+        block[i+0*8] = b0 + b7;
+        block[i+7*8] = b0 - b7;
+        block[i+1*8] = b2 + b5;
+        block[i+6*8] = b2 - b5;
+        block[i+2*8] = b4 + b3;
+        block[i+5*8] = b4 - b3;
+        block[i+3*8] = b6 + b1;
+        block[i+4*8] = b6 - b1;
+    }
+    for( i = 0; i < 8; i++ )
+    {
+        const int a0 =  block[0+i*8] + block[4+i*8];
+        const int a2 =  block[0+i*8] - block[4+i*8];
+        const int a4 = (block[2+i*8]>>1) - block[6+i*8];
+        const int a6 = (block[6+i*8]>>1) + block[2+i*8];
+
+        const int b0 = a0 + a6;
+        const int b2 = a2 + a4;
+        const int b4 = a2 - a4;
+        const int b6 = a0 - a6;
+
+        const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
+        const int a3 =  block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
+        const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
+        const int a7 =  block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
+
+        const int b1 = (a7>>2) + a1;
+        const int b3 =  a3 + (a5>>2);
+        const int b5 = (a3>>2) - a5;
+        const int b7 =  a7 - (a1>>2);
+
+        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
+        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
+        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
+        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
+        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
+        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
+        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
+        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
+    }
+}
+
+// assumes all AC coefs are 0
+void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i, j;
+    int dc = (block[0] + 32) >> 6;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
+    for( j = 0; j < 4; j++ )
+    {
+        for( i = 0; i < 4; i++ )
+            dst[i] = cm[ dst[i] ];
+        dst += stride;
+    }
+}
+
+void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i, j;
+    int dc = (block[0] + 32) >> 6;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
+    for( j = 0; j < 8; j++ )
+    {
+        for( i = 0; i < 8; i++ )
+            dst[i] = cm[ dst[i] ];
+        dst += stride;
+    }
+}
+
+void ff_h264_idct_add16_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+    int i;
+    for(i=0; i<16; i++){
+        int nnz = nnzc[ scan8[i] ];
+        if(nnz){
+            if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
+            else                      idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
+        }
+    }
+}
+
+void ff_h264_idct_add16intra_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+    int i;
+    for(i=0; i<16; i++){
+        if(nnzc[ scan8[i] ]) idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
+        else if(block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
+    }
+}
+
+void ff_h264_idct8_add4_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+    int i;
+    for(i=0; i<16; i+=4){
+        int nnz = nnzc[ scan8[i] ];
+        if(nnz){
+            if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_c(dst + block_offset[i], block + i*16, stride);
+            else                      ff_h264_idct8_add_c   (dst + block_offset[i], block + i*16, stride);
+        }
+    }
+}
+
+void ff_h264_idct_add8_c(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+    int i;
+    for(i=16; i<16+8; i++){
+        if(nnzc[ scan8[i] ])
+            ff_h264_idct_add_c   (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+        else if(block[i*16])
+            ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+    }
+}
+/**
+ * IDCT transforms the 16 dc values and dequantizes them.
+ * @param qp quantization parameter
+ */
+void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul){
+#define stride 16
+    int i;
+    int temp[16];
+    static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
+
+    for(i=0; i<4; i++){
+        const int z0= input[4*i+0] + input[4*i+1];
+        const int z1= input[4*i+0] - input[4*i+1];
+        const int z2= input[4*i+2] - input[4*i+3];
+        const int z3= input[4*i+2] + input[4*i+3];
+
+        temp[4*i+0]= z0+z3;
+        temp[4*i+1]= z0-z3;
+        temp[4*i+2]= z1-z2;
+        temp[4*i+3]= z1+z2;
+    }
+
+    for(i=0; i<4; i++){
+        const int offset= x_offset[i];
+        const int z0= temp[4*0+i] + temp[4*2+i];
+        const int z1= temp[4*0+i] - temp[4*2+i];
+        const int z2= temp[4*1+i] - temp[4*3+i];
+        const int z3= temp[4*1+i] + temp[4*3+i];
+
+        output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8));
+        output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
+        output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
+        output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
+    }
+#undef stride
+}
+
+void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
+    const int stride= 16*2;
+    const int xStride= 16;
+    int a,b,c,d,e;
+
+    a= block[stride*0 + xStride*0];
+    b= block[stride*0 + xStride*1];
+    c= block[stride*1 + xStride*0];
+    d= block[stride*1 + xStride*1];
+
+    e= a-b;
+    a= a+b;
+    b= c-d;
+    c= c+d;
+
+    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
+    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
+    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
+    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
+}
-- 
cgit v1.2.3


From 325eefa2ca14b487120b164bc232e6c650fb20f3 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:51 +0200
Subject: Move some functions in dsputil.c into a new file dsputil_template.c.

The functions moved are used when decoding h264.
Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/dsputil.c          | 1228 +---------------------------------------
 libavcodec/dsputil_template.c | 1257 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1259 insertions(+), 1226 deletions(-)
 create mode 100644 libavcodec/dsputil_template.c

(limited to 'libavcodec')

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 32472b1a74..078e172d00 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -43,6 +43,8 @@
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t ff_squareTbl[512] = {0, };
 
+#include "dsputil_template.c"
+
 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
 #define pb_7f (~0UL/255 * 0x7f)
 #define pb_80 (~0UL/255 * 0x80)
@@ -296,110 +298,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     return s;
 }
 
-/* draw the edges of width 'w' of an image of size width, height */
-//FIXME check that this is ok for mpeg4 interlaced
-static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides)
-{
-    uint8_t *ptr, *last_line;
-    int i;
-
-    /* left and right */
-    ptr = buf;
-    for(i=0;i<height;i++) {
-        memset(ptr - w, ptr[0], w);
-        memset(ptr + width, ptr[width-1], w);
-        ptr += wrap;
-    }
-
-    /* top and bottom + corners */
-    buf -= w;
-    last_line = buf + (height - 1) * wrap;
-    if (sides & EDGE_TOP)
-        for(i = 0; i < w; i++)
-            memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top
-    if (sides & EDGE_BOTTOM)
-        for (i = 0; i < w; i++)
-            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom
-}
-
-/**
- * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
- * @param buf destination buffer
- * @param src source buffer
- * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
- * @param block_w width of block
- * @param block_h height of block
- * @param src_x x coordinate of the top left sample of the block in the source buffer
- * @param src_y y coordinate of the top left sample of the block in the source buffer
- * @param w width of the source buffer
- * @param h height of the source buffer
- */
-void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
-                                    int src_x, int src_y, int w, int h){
-    int x, y;
-    int start_y, start_x, end_y, end_x;
-
-    if(src_y>= h){
-        src+= (h-1-src_y)*linesize;
-        src_y=h-1;
-    }else if(src_y<=-block_h){
-        src+= (1-block_h-src_y)*linesize;
-        src_y=1-block_h;
-    }
-    if(src_x>= w){
-        src+= (w-1-src_x);
-        src_x=w-1;
-    }else if(src_x<=-block_w){
-        src+= (1-block_w-src_x);
-        src_x=1-block_w;
-    }
-
-    start_y= FFMAX(0, -src_y);
-    start_x= FFMAX(0, -src_x);
-    end_y= FFMIN(block_h, h-src_y);
-    end_x= FFMIN(block_w, w-src_x);
-    assert(start_y < end_y && block_h);
-    assert(start_x < end_x && block_w);
-
-    w    = end_x - start_x;
-    src += start_y*linesize + start_x;
-    buf += start_x;
-
-    //top
-    for(y=0; y<start_y; y++){
-        memcpy(buf, src, w);
-        buf += linesize;
-    }
-
-    // copy existing part
-    for(; y<end_y; y++){
-        memcpy(buf, src, w);
-        src += linesize;
-        buf += linesize;
-    }
-
-    //bottom
-    src -= linesize;
-    for(; y<block_h; y++){
-        memcpy(buf, src, w);
-        buf += linesize;
-    }
-
-    buf -= block_h * linesize + start_x;
-    while (block_h--){
-       //left
-        for(x=0; x<start_x; x++){
-            buf[x] = buf[start_x];
-        }
-
-       //right
-        for(x=end_x; x<block_w; x++){
-            buf[x] = buf[end_x - 1];
-        }
-        buf += linesize;
-    }
-}
-
 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
 {
     int i;
@@ -591,36 +489,6 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
     }
 }
 
-static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
-{
-    int i;
-    for(i=0;i<8;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels[4] += block[4];
-        pixels[5] += block[5];
-        pixels[6] += block[6];
-        pixels[7] += block[7];
-        pixels += line_size;
-        block += 8;
-    }
-}
-
-static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
-{
-    int i;
-    for(i=0;i<4;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels += line_size;
-        block += 4;
-    }
-}
-
 static int sum_abs_dctelem_c(DCTELEM *block)
 {
     int sum=0, i;
@@ -665,539 +533,9 @@ static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align
     }
 }
 
-#if 0
-
-#define PIXOP2(OPNAME, OP) \
-static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-    int i;\
-    for(i=0; i<h; i++){\
-        OP(*((uint64_t*)block), AV_RN64(pixels));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-\
-static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-    int i;\
-    for(i=0; i<h; i++){\
-        const uint64_t a= AV_RN64(pixels  );\
-        const uint64_t b= AV_RN64(pixels+1);\
-        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-\
-static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-    int i;\
-    for(i=0; i<h; i++){\
-        const uint64_t a= AV_RN64(pixels  );\
-        const uint64_t b= AV_RN64(pixels+1);\
-        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-\
-static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-    int i;\
-    for(i=0; i<h; i++){\
-        const uint64_t a= AV_RN64(pixels          );\
-        const uint64_t b= AV_RN64(pixels+line_size);\
-        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-\
-static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-    int i;\
-    for(i=0; i<h; i++){\
-        const uint64_t a= AV_RN64(pixels          );\
-        const uint64_t b= AV_RN64(pixels+line_size);\
-        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-\
-static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-        int i;\
-        const uint64_t a= AV_RN64(pixels  );\
-        const uint64_t b= AV_RN64(pixels+1);\
-        uint64_t l0=  (a&0x0303030303030303ULL)\
-                    + (b&0x0303030303030303ULL)\
-                    + 0x0202020202020202ULL;\
-        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
-                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
-        uint64_t l1,h1;\
-\
-        pixels+=line_size;\
-        for(i=0; i<h; i+=2){\
-            uint64_t a= AV_RN64(pixels  );\
-            uint64_t b= AV_RN64(pixels+1);\
-            l1=  (a&0x0303030303030303ULL)\
-               + (b&0x0303030303030303ULL);\
-            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
-              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
-            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
-            pixels+=line_size;\
-            block +=line_size;\
-            a= AV_RN64(pixels  );\
-            b= AV_RN64(pixels+1);\
-            l0=  (a&0x0303030303030303ULL)\
-               + (b&0x0303030303030303ULL)\
-               + 0x0202020202020202ULL;\
-            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
-              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
-            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
-            pixels+=line_size;\
-            block +=line_size;\
-        }\
-}\
-\
-static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-        int i;\
-        const uint64_t a= AV_RN64(pixels  );\
-        const uint64_t b= AV_RN64(pixels+1);\
-        uint64_t l0=  (a&0x0303030303030303ULL)\
-                    + (b&0x0303030303030303ULL)\
-                    + 0x0101010101010101ULL;\
-        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
-                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
-        uint64_t l1,h1;\
-\
-        pixels+=line_size;\
-        for(i=0; i<h; i+=2){\
-            uint64_t a= AV_RN64(pixels  );\
-            uint64_t b= AV_RN64(pixels+1);\
-            l1=  (a&0x0303030303030303ULL)\
-               + (b&0x0303030303030303ULL);\
-            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
-              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
-            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
-            pixels+=line_size;\
-            block +=line_size;\
-            a= AV_RN64(pixels  );\
-            b= AV_RN64(pixels+1);\
-            l0=  (a&0x0303030303030303ULL)\
-               + (b&0x0303030303030303ULL)\
-               + 0x0101010101010101ULL;\
-            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
-              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
-            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
-            pixels+=line_size;\
-            block +=line_size;\
-        }\
-}\
-\
-CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
-
-#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
-#else // 64 bit variant
-
-#define PIXOP2(OPNAME, OP) \
-static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
-        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
-        pixels+=line_size;\
-        block +=line_size;\
-    }\
-}\
-static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
-}\
-\
-static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
-                                                int src_stride1, int src_stride2, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN32(&src1[i*src_stride1  ]);\
-        b= AV_RN32(&src2[i*src_stride2  ]);\
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
-        a= AV_RN32(&src1[i*src_stride1+4]);\
-        b= AV_RN32(&src2[i*src_stride2+4]);\
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
-    }\
-}\
-\
-static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
-                                                int src_stride1, int src_stride2, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN32(&src1[i*src_stride1  ]);\
-        b= AV_RN32(&src2[i*src_stride2  ]);\
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
-        a= AV_RN32(&src1[i*src_stride1+4]);\
-        b= AV_RN32(&src2[i*src_stride2+4]);\
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
-    }\
-}\
-\
-static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
-                                                int src_stride1, int src_stride2, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN32(&src1[i*src_stride1  ]);\
-        b= AV_RN32(&src2[i*src_stride2  ]);\
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
-    }\
-}\
-\
-static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
-                                                int src_stride1, int src_stride2, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN16(&src1[i*src_stride1  ]);\
-        b= AV_RN16(&src2[i*src_stride2  ]);\
-        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
-    }\
-}\
-\
-static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
-                                                int src_stride1, int src_stride2, int h){\
-    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
-    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
-}\
-\
-static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
-                                                int src_stride1, int src_stride2, int h){\
-    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
-    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
-}\
-\
-static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
-                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        uint32_t a, b, c, d, l0, l1, h0, h1;\
-        a= AV_RN32(&src1[i*src_stride1]);\
-        b= AV_RN32(&src2[i*src_stride2]);\
-        c= AV_RN32(&src3[i*src_stride3]);\
-        d= AV_RN32(&src4[i*src_stride4]);\
-        l0=  (a&0x03030303UL)\
-           + (b&0x03030303UL)\
-           + 0x02020202UL;\
-        h0= ((a&0xFCFCFCFCUL)>>2)\
-          + ((b&0xFCFCFCFCUL)>>2);\
-        l1=  (c&0x03030303UL)\
-           + (d&0x03030303UL);\
-        h1= ((c&0xFCFCFCFCUL)>>2)\
-          + ((d&0xFCFCFCFCUL)>>2);\
-        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-        a= AV_RN32(&src1[i*src_stride1+4]);\
-        b= AV_RN32(&src2[i*src_stride2+4]);\
-        c= AV_RN32(&src3[i*src_stride3+4]);\
-        d= AV_RN32(&src4[i*src_stride4+4]);\
-        l0=  (a&0x03030303UL)\
-           + (b&0x03030303UL)\
-           + 0x02020202UL;\
-        h0= ((a&0xFCFCFCFCUL)>>2)\
-          + ((b&0xFCFCFCFCUL)>>2);\
-        l1=  (c&0x03030303UL)\
-           + (d&0x03030303UL);\
-        h1= ((c&0xFCFCFCFCUL)>>2)\
-          + ((d&0xFCFCFCFCUL)>>2);\
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-    }\
-}\
-\
-static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
-}\
-\
-static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
-                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
-    int i;\
-    for(i=0; i<h; i++){\
-        uint32_t a, b, c, d, l0, l1, h0, h1;\
-        a= AV_RN32(&src1[i*src_stride1]);\
-        b= AV_RN32(&src2[i*src_stride2]);\
-        c= AV_RN32(&src3[i*src_stride3]);\
-        d= AV_RN32(&src4[i*src_stride4]);\
-        l0=  (a&0x03030303UL)\
-           + (b&0x03030303UL)\
-           + 0x01010101UL;\
-        h0= ((a&0xFCFCFCFCUL)>>2)\
-          + ((b&0xFCFCFCFCUL)>>2);\
-        l1=  (c&0x03030303UL)\
-           + (d&0x03030303UL);\
-        h1= ((c&0xFCFCFCFCUL)>>2)\
-          + ((d&0xFCFCFCFCUL)>>2);\
-        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-        a= AV_RN32(&src1[i*src_stride1+4]);\
-        b= AV_RN32(&src2[i*src_stride2+4]);\
-        c= AV_RN32(&src3[i*src_stride3+4]);\
-        d= AV_RN32(&src4[i*src_stride4+4]);\
-        l0=  (a&0x03030303UL)\
-           + (b&0x03030303UL)\
-           + 0x01010101UL;\
-        h0= ((a&0xFCFCFCFCUL)>>2)\
-          + ((b&0xFCFCFCFCUL)>>2);\
-        l1=  (c&0x03030303UL)\
-           + (d&0x03030303UL);\
-        h1= ((c&0xFCFCFCFCUL)>>2)\
-          + ((d&0xFCFCFCFCUL)>>2);\
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-    }\
-}\
-static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
-                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
-    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
-    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
-}\
-static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
-                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
-    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
-    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
-}\
-\
-static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-        int i, a0, b0, a1, b1;\
-        a0= pixels[0];\
-        b0= pixels[1] + 2;\
-        a0 += b0;\
-        b0 += pixels[2];\
-\
-        pixels+=line_size;\
-        for(i=0; i<h; i+=2){\
-            a1= pixels[0];\
-            b1= pixels[1];\
-            a1 += b1;\
-            b1 += pixels[2];\
-\
-            block[0]= (a1+a0)>>2; /* FIXME non put */\
-            block[1]= (b1+b0)>>2;\
-\
-            pixels+=line_size;\
-            block +=line_size;\
-\
-            a0= pixels[0];\
-            b0= pixels[1] + 2;\
-            a0 += b0;\
-            b0 += pixels[2];\
-\
-            block[0]= (a1+a0)>>2;\
-            block[1]= (b1+b0)>>2;\
-            pixels+=line_size;\
-            block +=line_size;\
-        }\
-}\
-\
-static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-        int i;\
-        const uint32_t a= AV_RN32(pixels  );\
-        const uint32_t b= AV_RN32(pixels+1);\
-        uint32_t l0=  (a&0x03030303UL)\
-                    + (b&0x03030303UL)\
-                    + 0x02020202UL;\
-        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
-                   + ((b&0xFCFCFCFCUL)>>2);\
-        uint32_t l1,h1;\
-\
-        pixels+=line_size;\
-        for(i=0; i<h; i+=2){\
-            uint32_t a= AV_RN32(pixels  );\
-            uint32_t b= AV_RN32(pixels+1);\
-            l1=  (a&0x03030303UL)\
-               + (b&0x03030303UL);\
-            h1= ((a&0xFCFCFCFCUL)>>2)\
-              + ((b&0xFCFCFCFCUL)>>2);\
-            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-            pixels+=line_size;\
-            block +=line_size;\
-            a= AV_RN32(pixels  );\
-            b= AV_RN32(pixels+1);\
-            l0=  (a&0x03030303UL)\
-               + (b&0x03030303UL)\
-               + 0x02020202UL;\
-            h0= ((a&0xFCFCFCFCUL)>>2)\
-              + ((b&0xFCFCFCFCUL)>>2);\
-            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-            pixels+=line_size;\
-            block +=line_size;\
-        }\
-}\
-\
-static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-    int j;\
-    for(j=0; j<2; j++){\
-        int i;\
-        const uint32_t a= AV_RN32(pixels  );\
-        const uint32_t b= AV_RN32(pixels+1);\
-        uint32_t l0=  (a&0x03030303UL)\
-                    + (b&0x03030303UL)\
-                    + 0x02020202UL;\
-        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
-                   + ((b&0xFCFCFCFCUL)>>2);\
-        uint32_t l1,h1;\
-\
-        pixels+=line_size;\
-        for(i=0; i<h; i+=2){\
-            uint32_t a= AV_RN32(pixels  );\
-            uint32_t b= AV_RN32(pixels+1);\
-            l1=  (a&0x03030303UL)\
-               + (b&0x03030303UL);\
-            h1= ((a&0xFCFCFCFCUL)>>2)\
-              + ((b&0xFCFCFCFCUL)>>2);\
-            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-            pixels+=line_size;\
-            block +=line_size;\
-            a= AV_RN32(pixels  );\
-            b= AV_RN32(pixels+1);\
-            l0=  (a&0x03030303UL)\
-               + (b&0x03030303UL)\
-               + 0x02020202UL;\
-            h0= ((a&0xFCFCFCFCUL)>>2)\
-              + ((b&0xFCFCFCFCUL)>>2);\
-            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-            pixels+=line_size;\
-            block +=line_size;\
-        }\
-        pixels+=4-line_size*(h+1);\
-        block +=4-line_size*h;\
-    }\
-}\
-\
-static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
-{\
-    int j;\
-    for(j=0; j<2; j++){\
-        int i;\
-        const uint32_t a= AV_RN32(pixels  );\
-        const uint32_t b= AV_RN32(pixels+1);\
-        uint32_t l0=  (a&0x03030303UL)\
-                    + (b&0x03030303UL)\
-                    + 0x01010101UL;\
-        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
-                   + ((b&0xFCFCFCFCUL)>>2);\
-        uint32_t l1,h1;\
-\
-        pixels+=line_size;\
-        for(i=0; i<h; i+=2){\
-            uint32_t a= AV_RN32(pixels  );\
-            uint32_t b= AV_RN32(pixels+1);\
-            l1=  (a&0x03030303UL)\
-               + (b&0x03030303UL);\
-            h1= ((a&0xFCFCFCFCUL)>>2)\
-              + ((b&0xFCFCFCFCUL)>>2);\
-            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-            pixels+=line_size;\
-            block +=line_size;\
-            a= AV_RN32(pixels  );\
-            b= AV_RN32(pixels+1);\
-            l0=  (a&0x03030303UL)\
-               + (b&0x03030303UL)\
-               + 0x01010101UL;\
-            h0= ((a&0xFCFCFCFCUL)>>2)\
-              + ((b&0xFCFCFCFCUL)>>2);\
-            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
-            pixels+=line_size;\
-            block +=line_size;\
-        }\
-        pixels+=4-line_size*(h+1);\
-        block +=4-line_size*h;\
-    }\
-}\
-\
-CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
-av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
-
-#define op_avg(a, b) a = rnd_avg32(a, b)
-#endif
-#define op_put(a, b) a = b
-
-PIXOP2(avg, op_avg)
-PIXOP2(put, op_put)
-#undef op_avg
-#undef op_put
-
-#define put_no_rnd_pixels8_c  put_pixels8_c
-#define put_no_rnd_pixels16_c put_pixels16_c
-
 #define avg2(a,b) ((a+b+1)>>1)
 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
 
-static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
-    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
-}
-
-static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
-    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
-}
-
 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
 {
     const int A=(16-x16)*(16-y16);
@@ -1494,115 +832,6 @@ static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src
     void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
 #endif
 
-#define H264_CHROMA_MC(OPNAME, OP)\
-static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
-    const int A=(8-x)*(8-y);\
-    const int B=(  x)*(8-y);\
-    const int C=(8-x)*(  y);\
-    const int D=(  x)*(  y);\
-    int i;\
-    \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
-\
-    if(D){\
-        for(i=0; i<h; i++){\
-            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
-            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
-            dst+= stride;\
-            src+= stride;\
-        }\
-    }else{\
-        const int E= B+C;\
-        const int step= C ? stride : 1;\
-        for(i=0; i<h; i++){\
-            OP(dst[0], (A*src[0] + E*src[step+0]));\
-            OP(dst[1], (A*src[1] + E*src[step+1]));\
-            dst+= stride;\
-            src+= stride;\
-        }\
-    }\
-}\
-\
-static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
-    const int A=(8-x)*(8-y);\
-    const int B=(  x)*(8-y);\
-    const int C=(8-x)*(  y);\
-    const int D=(  x)*(  y);\
-    int i;\
-    \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
-\
-    if(D){\
-        for(i=0; i<h; i++){\
-            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
-            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
-            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
-            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
-            dst+= stride;\
-            src+= stride;\
-        }\
-    }else{\
-        const int E= B+C;\
-        const int step= C ? stride : 1;\
-        for(i=0; i<h; i++){\
-            OP(dst[0], (A*src[0] + E*src[step+0]));\
-            OP(dst[1], (A*src[1] + E*src[step+1]));\
-            OP(dst[2], (A*src[2] + E*src[step+2]));\
-            OP(dst[3], (A*src[3] + E*src[step+3]));\
-            dst+= stride;\
-            src+= stride;\
-        }\
-    }\
-}\
-\
-static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
-    const int A=(8-x)*(8-y);\
-    const int B=(  x)*(8-y);\
-    const int C=(8-x)*(  y);\
-    const int D=(  x)*(  y);\
-    int i;\
-    \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
-\
-    if(D){\
-        for(i=0; i<h; i++){\
-            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
-            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
-            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
-            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
-            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
-            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
-            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
-            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
-            dst+= stride;\
-            src+= stride;\
-        }\
-    }else{\
-        const int E= B+C;\
-        const int step= C ? stride : 1;\
-        for(i=0; i<h; i++){\
-            OP(dst[0], (A*src[0] + E*src[step+0]));\
-            OP(dst[1], (A*src[1] + E*src[step+1]));\
-            OP(dst[2], (A*src[2] + E*src[step+2]));\
-            OP(dst[3], (A*src[3] + E*src[step+3]));\
-            OP(dst[4], (A*src[4] + E*src[step+4]));\
-            OP(dst[5], (A*src[5] + E*src[step+5]));\
-            OP(dst[6], (A*src[6] + E*src[step+6]));\
-            OP(dst[7], (A*src[7] + E*src[step+7]));\
-            dst+= stride;\
-            src+= stride;\
-        }\
-    }\
-}
-
-#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
-#define op_put(a, b) a = (((b) + 32)>>6)
-
-H264_CHROMA_MC(put_       , op_put)
-H264_CHROMA_MC(avg_       , op_avg)
-#undef op_avg
-#undef op_put
-
 #define QPEL_MC(r, OPNAME, RND, OP) \
 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
@@ -2100,433 +1329,6 @@ QPEL_MC(0, avg_       , _       , op_avg)
 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
 
-#define H264_LOWPASS(OPNAME, OP, OP2) \
-static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    const int h=2;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    for(i=0; i<h; i++)\
-    {\
-        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
-        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
-        dst+=dstStride;\
-        src+=srcStride;\
-    }\
-}\
-\
-static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    const int w=2;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    for(i=0; i<w; i++)\
-    {\
-        const int srcB= src[-2*srcStride];\
-        const int srcA= src[-1*srcStride];\
-        const int src0= src[0 *srcStride];\
-        const int src1= src[1 *srcStride];\
-        const int src2= src[2 *srcStride];\
-        const int src3= src[3 *srcStride];\
-        const int src4= src[4 *srcStride];\
-        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
-        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
-        dst++;\
-        src++;\
-    }\
-}\
-\
-static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
-    const int h=2;\
-    const int w=2;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    src -= 2*srcStride;\
-    for(i=0; i<h+5; i++)\
-    {\
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
-        tmp+=tmpStride;\
-        src+=srcStride;\
-    }\
-    tmp -= tmpStride*(h+5-2);\
-    for(i=0; i<w; i++)\
-    {\
-        const int tmpB= tmp[-2*tmpStride];\
-        const int tmpA= tmp[-1*tmpStride];\
-        const int tmp0= tmp[0 *tmpStride];\
-        const int tmp1= tmp[1 *tmpStride];\
-        const int tmp2= tmp[2 *tmpStride];\
-        const int tmp3= tmp[3 *tmpStride];\
-        const int tmp4= tmp[4 *tmpStride];\
-        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
-        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
-        dst++;\
-        tmp++;\
-    }\
-}\
-static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    const int h=4;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    for(i=0; i<h; i++)\
-    {\
-        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
-        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
-        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
-        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
-        dst+=dstStride;\
-        src+=srcStride;\
-    }\
-}\
-\
-static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    const int w=4;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    for(i=0; i<w; i++)\
-    {\
-        const int srcB= src[-2*srcStride];\
-        const int srcA= src[-1*srcStride];\
-        const int src0= src[0 *srcStride];\
-        const int src1= src[1 *srcStride];\
-        const int src2= src[2 *srcStride];\
-        const int src3= src[3 *srcStride];\
-        const int src4= src[4 *srcStride];\
-        const int src5= src[5 *srcStride];\
-        const int src6= src[6 *srcStride];\
-        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
-        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
-        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
-        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
-        dst++;\
-        src++;\
-    }\
-}\
-\
-static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
-    const int h=4;\
-    const int w=4;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    src -= 2*srcStride;\
-    for(i=0; i<h+5; i++)\
-    {\
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
-        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
-        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
-        tmp+=tmpStride;\
-        src+=srcStride;\
-    }\
-    tmp -= tmpStride*(h+5-2);\
-    for(i=0; i<w; i++)\
-    {\
-        const int tmpB= tmp[-2*tmpStride];\
-        const int tmpA= tmp[-1*tmpStride];\
-        const int tmp0= tmp[0 *tmpStride];\
-        const int tmp1= tmp[1 *tmpStride];\
-        const int tmp2= tmp[2 *tmpStride];\
-        const int tmp3= tmp[3 *tmpStride];\
-        const int tmp4= tmp[4 *tmpStride];\
-        const int tmp5= tmp[5 *tmpStride];\
-        const int tmp6= tmp[6 *tmpStride];\
-        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
-        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
-        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
-        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
-        dst++;\
-        tmp++;\
-    }\
-}\
-\
-static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    const int h=8;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    for(i=0; i<h; i++)\
-    {\
-        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
-        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
-        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
-        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
-        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
-        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
-        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
-        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
-        dst+=dstStride;\
-        src+=srcStride;\
-    }\
-}\
-\
-static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    const int w=8;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    for(i=0; i<w; i++)\
-    {\
-        const int srcB= src[-2*srcStride];\
-        const int srcA= src[-1*srcStride];\
-        const int src0= src[0 *srcStride];\
-        const int src1= src[1 *srcStride];\
-        const int src2= src[2 *srcStride];\
-        const int src3= src[3 *srcStride];\
-        const int src4= src[4 *srcStride];\
-        const int src5= src[5 *srcStride];\
-        const int src6= src[6 *srcStride];\
-        const int src7= src[7 *srcStride];\
-        const int src8= src[8 *srcStride];\
-        const int src9= src[9 *srcStride];\
-        const int src10=src[10*srcStride];\
-        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
-        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
-        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
-        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
-        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
-        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
-        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
-        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
-        dst++;\
-        src++;\
-    }\
-}\
-\
-static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
-    const int h=8;\
-    const int w=8;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
-    int i;\
-    src -= 2*srcStride;\
-    for(i=0; i<h+5; i++)\
-    {\
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
-        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
-        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
-        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
-        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
-        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
-        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
-        tmp+=tmpStride;\
-        src+=srcStride;\
-    }\
-    tmp -= tmpStride*(h+5-2);\
-    for(i=0; i<w; i++)\
-    {\
-        const int tmpB= tmp[-2*tmpStride];\
-        const int tmpA= tmp[-1*tmpStride];\
-        const int tmp0= tmp[0 *tmpStride];\
-        const int tmp1= tmp[1 *tmpStride];\
-        const int tmp2= tmp[2 *tmpStride];\
-        const int tmp3= tmp[3 *tmpStride];\
-        const int tmp4= tmp[4 *tmpStride];\
-        const int tmp5= tmp[5 *tmpStride];\
-        const int tmp6= tmp[6 *tmpStride];\
-        const int tmp7= tmp[7 *tmpStride];\
-        const int tmp8= tmp[8 *tmpStride];\
-        const int tmp9= tmp[9 *tmpStride];\
-        const int tmp10=tmp[10*tmpStride];\
-        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
-        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
-        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
-        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
-        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
-        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
-        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
-        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
-        dst++;\
-        tmp++;\
-    }\
-}\
-\
-static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
-    src += 8*srcStride;\
-    dst += 8*dstStride;\
-    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
-}\
-\
-static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
-    src += 8*srcStride;\
-    dst += 8*dstStride;\
-    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
-}\
-\
-static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
-    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
-    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
-    src += 8*srcStride;\
-    dst += 8*dstStride;\
-    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
-    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
-}\
-
-#define H264_MC(OPNAME, SIZE) \
-static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
-    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t half[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
-    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t half[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t half[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t half[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfV[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfV[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
-}\
-
-#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
-//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
-#define op_put(a, b)  a = cm[((b) + 16)>>5]
-#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
-#define op2_put(a, b)  a = cm[((b) + 512)>>10]
-
-H264_LOWPASS(put_       , op_put, op2_put)
-H264_LOWPASS(avg_       , op_avg, op2_avg)
-H264_MC(put_, 2)
-H264_MC(put_, 4)
-H264_MC(put_, 8)
-H264_MC(put_, 16)
-H264_MC(avg_, 4)
-H264_MC(avg_, 8)
-H264_MC(avg_, 16)
-
-#undef op_avg
-#undef op_put
-#undef op2_avg
-#undef op2_put
-
-#define put_h264_qpel8_mc00_c  ff_put_pixels8x8_c
-#define avg_h264_qpel8_mc00_c  ff_avg_pixels8x8_c
-#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
-#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
-
 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int i;
@@ -2545,19 +1347,6 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
     }
 }
 
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
-    put_pixels8_c(dst, src, stride, 8);
-}
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
-    avg_pixels8_c(dst, src, stride, 8);
-}
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
-    put_pixels16_c(dst, src, stride, 16);
-}
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
-    avg_pixels16_c(dst, src, stride, 16);
-}
-
 #if CONFIG_RV40_DECODER
 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
     put_pixels16_xy2_c(dst, src, stride, 16);
@@ -3115,19 +1904,6 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
     }
 }
 
-static void clear_block_c(DCTELEM *block)
-{
-    memset(block, 0, sizeof(DCTELEM)*64);
-}
-
-/**
- * memset(blocks, 0, sizeof(DCTELEM)*6*64)
- */
-static void clear_blocks_c(DCTELEM *blocks)
-{
-    memset(blocks, 0, sizeof(DCTELEM)*6*64);
-}
-
 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
     long i;
     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
new file mode 100644
index 0000000000..866817ce2e
--- /dev/null
+++ b/libavcodec/dsputil_template.c
@@ -0,0 +1,1257 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DSP utils
+ */
+
+#include "dsputil.h"
+
+/* draw the edges of width 'w' of an image of size width, height */
+//FIXME check that this is ok for mpeg4 interlaced
+static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides)
+{
+    uint8_t *ptr, *last_line;
+    int i;
+
+    /* left and right */
+    ptr = buf;
+    for(i=0;i<height;i++) {
+        memset(ptr - w, ptr[0], w);
+        memset(ptr + width, ptr[width-1], w);
+        ptr += wrap;
+    }
+
+    /* top and bottom + corners */
+    buf -= w;
+    last_line = buf + (height - 1) * wrap;
+    if (sides & EDGE_TOP)
+        for(i = 0; i < w; i++)
+            memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top
+    if (sides & EDGE_BOTTOM)
+        for (i = 0; i < w; i++)
+            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom
+}
+
+/**
+ * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
+                                    int src_x, int src_y, int w, int h){
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+
+    if(src_y>= h){
+        src+= (h-1-src_y)*linesize;
+        src_y=h-1;
+    }else if(src_y<=-block_h){
+        src+= (1-block_h-src_y)*linesize;
+        src_y=1-block_h;
+    }
+    if(src_x>= w){
+        src+= (w-1-src_x);
+        src_x=w-1;
+    }else if(src_x<=-block_w){
+        src+= (1-block_w-src_x);
+        src_x=1-block_w;
+    }
+
+    start_y= FFMAX(0, -src_y);
+    start_x= FFMAX(0, -src_x);
+    end_y= FFMIN(block_h, h-src_y);
+    end_x= FFMIN(block_w, w-src_x);
+    assert(start_y < end_y && block_h);
+    assert(start_x < end_x && block_w);
+
+    w    = end_x - start_x;
+    src += start_y*linesize + start_x;
+    buf += start_x;
+
+    //top
+    for(y=0; y<start_y; y++){
+        memcpy(buf, src, w);
+        buf += linesize;
+    }
+
+    // copy existing part
+    for(; y<end_y; y++){
+        memcpy(buf, src, w);
+        src += linesize;
+        buf += linesize;
+    }
+
+    //bottom
+    src -= linesize;
+    for(; y<block_h; y++){
+        memcpy(buf, src, w);
+        buf += linesize;
+    }
+
+    buf -= block_h * linesize + start_x;
+    while (block_h--){
+       //left
+        for(x=0; x<start_x; x++){
+            buf[x] = buf[start_x];
+        }
+
+       //right
+        for(x=end_x; x<block_w; x++){
+            buf[x] = buf[end_x - 1];
+        }
+        buf += linesize;
+    }
+}
+
+static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<8;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels[4] += block[4];
+        pixels[5] += block[5];
+        pixels[6] += block[6];
+        pixels[7] += block[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<4;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels += line_size;
+        block += 4;
+    }
+}
+
+#if 0
+
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint64_t*)block), AV_RN64(pixels));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0202020202020202ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0202020202020202ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0101010101010101ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0101010101010101ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
+
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
+#else // 64 bit variant
+
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN16(&src1[i*src_stride1  ]);\
+        b= AV_RN16(&src2[i*src_stride2  ]);\
+        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i, a0, b0, a1, b1;\
+        a0= pixels[0];\
+        b0= pixels[1] + 2;\
+        a0 += b0;\
+        b0 += pixels[2];\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            a1= pixels[0];\
+            b1= pixels[1];\
+            a1 += b1;\
+            b1 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2; /* FIXME non put */\
+            block[1]= (b1+b0)>>2;\
+\
+            pixels+=line_size;\
+            block +=line_size;\
+\
+            a0= pixels[0];\
+            b0= pixels[1] + 2;\
+            a0 += b0;\
+            b0 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2;\
+            block[1]= (b1+b0)>>2;\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x01010101UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x01010101UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
+av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
+
+#define op_avg(a, b) a = rnd_avg32(a, b)
+#endif
+
+#define op_put(a, b) a = b
+
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+
+#define put_no_rnd_pixels8_c  put_pixels8_c
+#define put_no_rnd_pixels16_c put_pixels16_c
+
+static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+}
+
+static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+}
+
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
+            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
+            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
+            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            OP(dst[4], (A*src[4] + E*src[step+4]));\
+            OP(dst[5], (A*src[5] + E*src[step+5]));\
+            OP(dst[6], (A*src[6] + E*src[step+6]));\
+            OP(dst[7], (A*src[7] + E*src[step+7]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}
+
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+
+H264_CHROMA_MC(put_       , op_put)
+H264_CHROMA_MC(avg_       , op_avg)
+#undef op_avg
+#undef op_put
+
+#define H264_LOWPASS(OPNAME, OP, OP2) \
+static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=2;\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=4;\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        const int src7= src[7 *srcStride];\
+        const int src8= src[8 *srcStride];\
+        const int src9= src[9 *srcStride];\
+        const int src10=src[10*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=8;\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
+        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
+        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
+        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
+        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        const int tmp7= tmp[7 *tmpStride];\
+        const int tmp8= tmp[8 *tmpStride];\
+        const int tmp9= tmp[9 *tmpStride];\
+        const int tmp10=tmp[10*tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
+        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
+        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
+        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+}\
+
+#define H264_MC(OPNAME, SIZE) \
+static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+
+#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
+#define op_put(a, b)  a = cm[((b) + 16)>>5]
+#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
+#define op2_put(a, b)  a = cm[((b) + 512)>>10]
+
+H264_LOWPASS(put_       , op_put, op2_put)
+H264_LOWPASS(avg_       , op_avg, op2_avg)
+H264_MC(put_, 2)
+H264_MC(put_, 4)
+H264_MC(put_, 8)
+H264_MC(put_, 16)
+H264_MC(avg_, 4)
+H264_MC(avg_, 8)
+H264_MC(avg_, 16)
+
+#undef op_avg
+#undef op_put
+#undef op2_avg
+#undef op2_put
+
+#define put_h264_qpel8_mc00_c  ff_put_pixels8x8_c
+#define avg_h264_qpel8_mc00_c  ff_avg_pixels8x8_c
+#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
+#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
+
+void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
+void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels16_c(dst, src, stride, 16);
+}
+void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels16_c(dst, src, stride, 16);
+}
+
+static void clear_block_c(DCTELEM *block)
+{
+    memset(block, 0, sizeof(DCTELEM)*64);
+}
+
+/**
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)
+ */
+static void clear_blocks_c(DCTELEM *blocks)
+{
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
-- 
cgit v1.2.3


From de3e760720ecc0c755cc2216ef9bf3dba39450a4 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Mon, 9 May 2011 11:18:37 -0400
Subject: Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264dsp_template.c  | 119 +++++----
 libavcodec/h264pred_template.c | 585 +++++++++++++++++++++++------------------
 2 files changed, 406 insertions(+), 298 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index 2fe09ff56b..7190f4d9ca 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -25,12 +25,19 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
-#define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
+#define BIT_DEPTH 8
+#define pixel uint8_t
+#define av_clip_pixel av_clip_uint8
+#define FUNCC(a) a ## _c
+
+#define op_scale1(x)  block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
+#define op_scale2(x)  dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
 #define H264_WEIGHT(W,H) \
-static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
+static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *_block, int stride, int log2_denom, int weight, int offset){ \
     int y; \
-    offset <<= log2_denom; \
+    pixel *block = (pixel*)_block; \
+    stride /= sizeof(pixel); \
+    offset <<= (log2_denom + (BIT_DEPTH-8)); \
     if(log2_denom) offset += 1<<(log2_denom-1); \
     for(y=0; y<H; y++, block += stride){ \
         op_scale1(0); \
@@ -54,8 +61,11 @@ static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride,
         op_scale1(15); \
     } \
 } \
-static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \
     int y; \
+    pixel *dst = (pixel*)_dst; \
+    pixel *src = (pixel*)_src; \
+    stride /= sizeof(pixel); \
     offset = ((offset + 1) | 1) << log2_denom; \
     for(y=0; y<H; y++, dst += stride, src += stride){ \
         op_scale2(0); \
@@ -95,11 +105,17 @@ H264_WEIGHT(2,2)
 #undef op_scale2
 #undef H264_WEIGHT
 
-static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
+    pixel *pix = (pixel*)_pix;
     int i, d;
+    xstride /= sizeof(pixel);
+    ystride /= sizeof(pixel);
+    alpha <<= BIT_DEPTH - 8;
+    beta  <<= BIT_DEPTH - 8;
     for( i = 0; i < 4; i++ ) {
-        if( tc0[i] < 0 ) {
+        const int tc_orig = tc0[i] << (BIT_DEPTH - 8);
+        if( tc_orig < 0 ) {
             pix += inner_iters*ystride;
             continue;
         }
@@ -115,44 +131,49 @@ static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, in
                 FFABS( p1 - p0 ) < beta &&
                 FFABS( q1 - q0 ) < beta ) {
 
-                int tc = tc0[i];
+                int tc = tc_orig;
                 int i_delta;
 
                 if( FFABS( p2 - p0 ) < beta ) {
-                    if(tc0[i])
-                    pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
+                    if(tc_orig)
+                    pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig );
                     tc++;
                 }
                 if( FFABS( q2 - q0 ) < beta ) {
-                    if(tc0[i])
-                    pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
+                    if(tc_orig)
+                    pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig );
                     tc++;
                 }
 
                 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-                pix[-xstride] = av_clip_uint8( p0 + i_delta );    /* p0' */
-                pix[0]        = av_clip_uint8( q0 - i_delta );    /* q0' */
+                pix[-xstride] = av_clip_pixel( p0 + i_delta );    /* p0' */
+                pix[0]        = av_clip_pixel( q0 - i_delta );    /* q0' */
             }
             pix += ystride;
         }
     }
 }
-static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_luma_c(pix, stride, 1, 4, alpha, beta, tc0);
+    FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0);
 }
-static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_luma_c(pix, 1, stride, 4, alpha, beta, tc0);
+    FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
 }
-static void h264_h_loop_filter_luma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_luma_c(pix, 1, stride, 2, alpha, beta, tc0);
+    FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
 {
+    pixel *pix = (pixel*)_pix;
     int d;
+    xstride /= sizeof(pixel);
+    ystride /= sizeof(pixel);
+    alpha <<= BIT_DEPTH - 8;
+    beta  <<= BIT_DEPTH - 8;
     for( d = 0; d < 4 * inner_iters; d++ ) {
         const int p2 = pix[-3*xstride];
         const int p1 = pix[-2*xstride];
@@ -198,24 +219,29 @@ static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *p
         pix += ystride;
     }
 }
-static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_luma_intra_c(pix, stride, 1, 4, alpha, beta);
+    FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta);
 }
-static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_luma_intra_c(pix, 1, stride, 4, alpha, beta);
+    FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
 }
-static void h264_h_loop_filter_luma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_luma_intra_c(pix, 1, stride, 2, alpha, beta);
+    FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
 }
 
-static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
+    pixel *pix = (pixel*)_pix;
     int i, d;
+    xstride /= sizeof(pixel);
+    ystride /= sizeof(pixel);
+    alpha <<= BIT_DEPTH - 8;
+    beta  <<= BIT_DEPTH - 8;
     for( i = 0; i < 4; i++ ) {
-        const int tc = tc0[i];
+        const int tc = ((tc0[i] - 1) << (BIT_DEPTH - 8)) + 1;
         if( tc <= 0 ) {
             pix += inner_iters*ystride;
             continue;
@@ -232,29 +258,34 @@ static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix,
 
                 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 
-                pix[-xstride] = av_clip_uint8( p0 + delta );    /* p0' */
-                pix[0]        = av_clip_uint8( q0 - delta );    /* q0' */
+                pix[-xstride] = av_clip_pixel( p0 + delta );    /* p0' */
+                pix[0]        = av_clip_pixel( q0 - delta );    /* q0' */
             }
             pix += ystride;
         }
     }
 }
-static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_chroma_c(pix, stride, 1, 2, alpha, beta, tc0);
+    FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0);
 }
-static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_chroma_c(pix, 1, stride, 2, alpha, beta, tc0);
+    FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
 }
-static void h264_h_loop_filter_chroma_mbaff_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
-    h264_loop_filter_chroma_c(pix, 1, stride, 1, alpha, beta, tc0);
+    FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
 {
+    pixel *pix = (pixel*)_pix;
     int d;
+    xstride /= sizeof(pixel);
+    ystride /= sizeof(pixel);
+    alpha <<= BIT_DEPTH - 8;
+    beta  <<= BIT_DEPTH - 8;
     for( d = 0; d < 4 * inner_iters; d++ ) {
         const int p0 = pix[-1*xstride];
         const int p1 = pix[-2*xstride];
@@ -271,15 +302,15 @@ static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t
         pix += ystride;
     }
 }
-static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_chroma_intra_c(pix, stride, 1, 2, alpha, beta);
+    FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta);
 }
-static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_chroma_intra_c(pix, 1, stride, 2, alpha, beta);
+    FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
 }
-static void h264_h_loop_filter_chroma_mbaff_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
 {
-    h264_loop_filter_chroma_intra_c(pix, 1, stride, 1, alpha, beta);
+    FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta);
 }
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 7856315a03..4bd26e2892 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -28,68 +28,98 @@
 #include "mathops.h"
 #include "dsputil.h"
 
-static void pred4x4_vertical_c(uint8_t *src, const uint8_t *topright, int stride){
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    ((uint32_t*)(src+0*stride))[0]= a;
-    ((uint32_t*)(src+1*stride))[0]= a;
-    ((uint32_t*)(src+2*stride))[0]= a;
-    ((uint32_t*)(src+3*stride))[0]= a;
-}
-
-static void pred4x4_horizontal_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
-    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
-    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
-    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
-}
-
-static void pred4x4_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+#define BIT_DEPTH 8
+
+#define pixel uint8_t
+#define pixel4 uint32_t
+#define dctcoef DCTELEM
+
+#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+#define CLIP(a) cm[a]
+#define FUNC(a) a
+#define FUNCC(a) a ## _c
+#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+#define AV_WN4P  AV_WN32
+#define AV_WN4PA AV_WN32A
+
+static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+    const pixel4 a= ((pixel4*)(src-stride))[0];
+    ((pixel4*)(src+0*stride))[0]= a;
+    ((pixel4*)(src+1*stride))[0]= a;
+    ((pixel4*)(src+2*stride))[0]= a;
+    ((pixel4*)(src+3*stride))[0]= a;
+}
+
+static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+    ((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]);
+    ((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]);
+    ((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]);
+    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]);
+}
+
+static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
                    + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
 
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+    ((pixel4*)(src+0*stride))[0]=
+    ((pixel4*)(src+1*stride))[0]=
+    ((pixel4*)(src+2*stride))[0]=
+    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
 }
 
-static void pred4x4_left_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
 
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+    ((pixel4*)(src+0*stride))[0]=
+    ((pixel4*)(src+1*stride))[0]=
+    ((pixel4*)(src+2*stride))[0]=
+    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
 }
 
-static void pred4x4_top_dc_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
 
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+    ((pixel4*)(src+0*stride))[0]=
+    ((pixel4*)(src+1*stride))[0]=
+    ((pixel4*)(src+2*stride))[0]=
+    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
 }
 
-static void pred4x4_128_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
+static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+    ((pixel4*)(src+0*stride))[0]=
+    ((pixel4*)(src+1*stride))[0]=
+    ((pixel4*)(src+2*stride))[0]=
+    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
 }
 
-static void pred4x4_127_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= 127U*0x01010101U;
+static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+    ((pixel4*)(src+0*stride))[0]=
+    ((pixel4*)(src+1*stride))[0]=
+    ((pixel4*)(src+2*stride))[0]=
+    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
 }
 
-static void pred4x4_129_dc_c(uint8_t *src, const uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]=
-    ((uint32_t*)(src+1*stride))[0]=
-    ((uint32_t*)(src+2*stride))[0]=
-    ((uint32_t*)(src+3*stride))[0]= 129U*0x01010101U;
+static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+    ((pixel4*)(src+0*stride))[0]=
+    ((pixel4*)(src+1*stride))[0]=
+    ((pixel4*)(src+2*stride))[0]=
+    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
 }
 
 
@@ -117,7 +147,9 @@ static void pred4x4_129_dc_c(uint8_t *src, const uint8_t *topright, int stride){
     const int av_unused t2= src[ 2-1*stride];\
     const int av_unused t3= src[ 3-1*stride];\
 
-static void pred4x4_down_right_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     const int lt= src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
@@ -140,7 +172,10 @@ static void pred4x4_down_right_c(uint8_t *src, const uint8_t *topright, int stri
     src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
 }
 
-static void pred4x4_down_left_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
+    pixel *src = (pixel*)_src;
+    const pixel *topright = (const pixel*)_topright;
+    int stride = _stride/sizeof(pixel);
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 //    LOAD_LEFT_EDGE
@@ -163,7 +198,9 @@ static void pred4x4_down_left_c(uint8_t *src, const uint8_t *topright, int strid
     src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
 }
 
-static void pred4x4_vertical_right_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     const int lt= src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
@@ -186,7 +223,10 @@ static void pred4x4_vertical_right_c(uint8_t *src, const uint8_t *topright, int
     src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
 }
 
-static void pred4x4_vertical_left_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
+    pixel *src = (pixel*)_src;
+    const pixel *topright = (const pixel*)_topright;
+    int stride = _stride/sizeof(pixel);
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 
@@ -208,7 +248,9 @@ static void pred4x4_vertical_left_c(uint8_t *src, const uint8_t *topright, int s
     src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
 }
 
-static void pred4x4_horizontal_up_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     LOAD_LEFT_EDGE
 
     src[0+0*stride]=(l0 + l1 + 1)>>1;
@@ -229,7 +271,9 @@ static void pred4x4_horizontal_up_c(uint8_t *src, const uint8_t *topright, int s
     src[3+3*stride]=l3;
 }
 
-static void pred4x4_horizontal_down_c(uint8_t *src, const uint8_t *topright, int stride){
+static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     const int lt= src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
@@ -252,34 +296,50 @@ static void pred4x4_horizontal_down_c(uint8_t *src, const uint8_t *topright, int
     src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
 }
 
-static void pred16x16_vertical_c(uint8_t *src, int stride){
+static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){
     int i;
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    const uint32_t b= ((uint32_t*)(src-stride))[1];
-    const uint32_t c= ((uint32_t*)(src-stride))[2];
-    const uint32_t d= ((uint32_t*)(src-stride))[3];
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+    const pixel4 a = ((pixel4*)(src-stride))[0];
+    const pixel4 b = ((pixel4*)(src-stride))[1];
+    const pixel4 c = ((pixel4*)(src-stride))[2];
+    const pixel4 d = ((pixel4*)(src-stride))[3];
 
     for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]= a;
-        ((uint32_t*)(src+i*stride))[1]= b;
-        ((uint32_t*)(src+i*stride))[2]= c;
-        ((uint32_t*)(src+i*stride))[3]= d;
+        ((pixel4*)(src+i*stride))[0] = a;
+        ((pixel4*)(src+i*stride))[1] = b;
+        ((pixel4*)(src+i*stride))[2] = c;
+        ((pixel4*)(src+i*stride))[3] = d;
     }
 }
 
-static void pred16x16_horizontal_c(uint8_t *src, int stride){
+static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){
     int i;
+    pixel *src = (pixel*)_src;
+    stride /= sizeof(pixel);
 
     for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
+        ((pixel4*)(src+i*stride))[0] =
+        ((pixel4*)(src+i*stride))[1] =
+        ((pixel4*)(src+i*stride))[2] =
+        ((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]);
     }
 }
 
-static void pred16x16_dc_c(uint8_t *src, int stride){
+#define PREDICT_16x16_DC(v)\
+    for(i=0; i<16; i++){\
+        AV_WN4P(src+ 0, v);\
+        AV_WN4P(src+ 4, v);\
+        AV_WN4P(src+ 8, v);\
+        AV_WN4P(src+12, v);\
+        src += stride;\
+    }
+
+static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){
     int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride /= sizeof(pixel);
 
     for(i=0;i<16; i++){
         dc+= src[-1+i*stride];
@@ -289,89 +349,59 @@ static void pred16x16_dc_c(uint8_t *src, int stride){
         dc+= src[i-stride];
     }
 
-    dc= 0x01010101*((dc + 16)>>5);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
+    dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
+    PREDICT_16x16_DC(dcsplat);
 }
 
-static void pred16x16_left_dc_c(uint8_t *src, int stride){
+static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){
     int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride /= sizeof(pixel);
 
     for(i=0;i<16; i++){
         dc+= src[-1+i*stride];
     }
 
-    dc= 0x01010101*((dc + 8)>>4);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
+    dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+    PREDICT_16x16_DC(dcsplat);
 }
 
-static void pred16x16_top_dc_c(uint8_t *src, int stride){
+static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){
     int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride /= sizeof(pixel);
 
     for(i=0;i<16; i++){
         dc+= src[i-stride];
     }
-    dc= 0x01010101*((dc + 8)>>4);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
-}
 
-static void pred16x16_128_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
-    }
+    dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+    PREDICT_16x16_DC(dcsplat);
 }
 
-static void pred16x16_127_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*127U;
-    }
+#define PRED16x16_X(n, v) \
+static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\
+    int i;\
+    pixel *src = (pixel*)_src;\
+    stride /= sizeof(pixel);\
+    PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
 }
 
-static void pred16x16_129_dc_c(uint8_t *src, int stride){
-    int i;
+PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1);
+PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0);
+PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1);
 
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*129U;
-    }
-}
-
-static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){
+static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){
   int i, j, k;
   int a;
-  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-  const uint8_t * const src0 = src+7-stride;
-  const uint8_t *src1 = src+8*stride-1;
-  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride/sizeof(pixel);
+  const pixel * const src0 = src +7-stride;
+  const pixel *       src1 = src +8*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
   int H = src0[1] - src0[-1];
   int V = src1[0] - src2[ 0];
   for(k=2; k<=8; ++k) {
@@ -398,113 +428,115 @@ static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int
     int b = a;
     a += V;
     for(i=-16; i<0; i+=4) {
-      src[16+i] = cm[ (b    ) >> 5 ];
-      src[17+i] = cm[ (b+  H) >> 5 ];
-      src[18+i] = cm[ (b+2*H) >> 5 ];
-      src[19+i] = cm[ (b+3*H) >> 5 ];
+      src[16+i] = CLIP((b    ) >> 5);
+      src[17+i] = CLIP((b+  H) >> 5);
+      src[18+i] = CLIP((b+2*H) >> 5);
+      src[19+i] = CLIP((b+3*H) >> 5);
       b += 4*H;
     }
     src += stride;
   }
 }
 
-static void pred16x16_plane_c(uint8_t *src, int stride){
-    pred16x16_plane_compat_c(src, stride, 0, 0);
+static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){
+    FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
 }
 
-static void pred8x8_vertical_c(uint8_t *src, int stride){
+static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
     int i;
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    const uint32_t b= ((uint32_t*)(src-stride))[1];
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+    const pixel4 a= ((pixel4*)(src-stride))[0];
+    const pixel4 b= ((pixel4*)(src-stride))[1];
 
     for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= a;
-        ((uint32_t*)(src+i*stride))[1]= b;
+        ((pixel4*)(src+i*stride))[0]= a;
+        ((pixel4*)(src+i*stride))[1]= b;
     }
 }
 
-static void pred8x8_horizontal_c(uint8_t *src, int stride){
+static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
     int i;
+    pixel *src = (pixel*)_src;
+    stride /= sizeof(pixel);
 
     for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
+        ((pixel4*)(src+i*stride))[0]=
+        ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]);
     }
 }
 
-static void pred8x8_128_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
-    }
+#define PRED8x8_X(n, v)\
+static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
+    int i;\
+    pixel *src = (pixel*)_src;\
+    stride /= sizeof(pixel);\
+    for(i=0; i<8; i++){\
+        ((pixel4*)(src+i*stride))[0]=\
+        ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\
+    }\
 }
 
-static void pred8x8_127_dc_c(uint8_t *src, int stride){
-    int i;
+PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1);
+PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0);
+PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1);
 
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*127U;
-    }
-}
-static void pred8x8_129_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*129U;
-    }
-}
-
-static void pred8x8_left_dc_c(uint8_t *src, int stride){
+static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
     int i;
     int dc0, dc2;
+    pixel4 dc0splat, dc2splat;
+    pixel *src = (pixel*)_src;
+    stride /= sizeof(pixel);
 
     dc0=dc2=0;
     for(i=0;i<4; i++){
         dc0+= src[-1+i*stride];
         dc2+= src[-1+(i+4)*stride];
     }
-    dc0= 0x01010101*((dc0 + 2)>>2);
-    dc2= 0x01010101*((dc2 + 2)>>2);
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
 
     for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= dc0;
+        ((pixel4*)(src+i*stride))[0]=
+        ((pixel4*)(src+i*stride))[1]= dc0splat;
     }
     for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= dc2;
+        ((pixel4*)(src+i*stride))[0]=
+        ((pixel4*)(src+i*stride))[1]= dc2splat;
     }
 }
 
-static void pred8x8_top_dc_c(uint8_t *src, int stride){
+static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
     int i;
     int dc0, dc1;
+    pixel4 dc0splat, dc1splat;
+    pixel *src = (pixel*)_src;
+    stride /= sizeof(pixel);
 
     dc0=dc1=0;
     for(i=0;i<4; i++){
         dc0+= src[i-stride];
         dc1+= src[4+i-stride];
     }
-    dc0= 0x01010101*((dc0 + 2)>>2);
-    dc1= 0x01010101*((dc1 + 2)>>2);
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
 
     for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
+        ((pixel4*)(src+i*stride))[0]= dc0splat;
+        ((pixel4*)(src+i*stride))[1]= dc1splat;
     }
     for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
+        ((pixel4*)(src+i*stride))[0]= dc0splat;
+        ((pixel4*)(src+i*stride))[1]= dc1splat;
     }
 }
 
-static void pred8x8_dc_c(uint8_t *src, int stride){
+static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
     int i;
-    int dc0, dc1, dc2, dc3;
+    int dc0, dc1, dc2;
+    pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
+    pixel *src = (pixel*)_src;
+    stride /= sizeof(pixel);
 
     dc0=dc1=dc2=0;
     for(i=0;i<4; i++){
@@ -512,51 +544,53 @@ static void pred8x8_dc_c(uint8_t *src, int stride){
         dc1+= src[4+i-stride];
         dc2+= src[-1+(i+4)*stride];
     }
-    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
-    dc0= 0x01010101*((dc0 + 4)>>3);
-    dc1= 0x01010101*((dc1 + 2)>>2);
-    dc2= 0x01010101*((dc2 + 2)>>2);
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
 
     for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
+        ((pixel4*)(src+i*stride))[0]= dc0splat;
+        ((pixel4*)(src+i*stride))[1]= dc1splat;
     }
     for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc2;
-        ((uint32_t*)(src+i*stride))[1]= dc3;
+        ((pixel4*)(src+i*stride))[0]= dc2splat;
+        ((pixel4*)(src+i*stride))[1]= dc3splat;
     }
 }
 
 //the following 4 function should not be optimized!
-static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){
-    pred8x8_top_dc_c(src, stride);
-    pred4x4_dc_c(src, NULL, stride);
+static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
+    FUNCC(pred8x8_top_dc)(src, stride);
+    FUNCC(pred4x4_dc)(src, NULL, stride);
 }
 
-static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){
-    pred8x8_dc_c(src, stride);
-    pred4x4_top_dc_c(src, NULL, stride);
+static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
+    FUNCC(pred8x8_dc)(src, stride);
+    FUNCC(pred4x4_top_dc)(src, NULL, stride);
 }
 
-static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){
-    pred8x8_left_dc_c(src, stride);
-    pred4x4_128_dc_c(src + 4*stride    , NULL, stride);
-    pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride);
+static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
+    FUNCC(pred8x8_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
 }
 
-static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){
-    pred8x8_left_dc_c(src, stride);
-    pred4x4_128_dc_c(src    , NULL, stride);
-    pred4x4_128_dc_c(src + 4, NULL, stride);
+static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
+    FUNCC(pred8x8_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
 }
 
-static void pred8x8_plane_c(uint8_t *src, int stride){
+static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
   int j, k;
   int a;
-  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-  const uint8_t * const src0 = src+3-stride;
-  const uint8_t *src1 = src+4*stride-1;
-  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride/sizeof(pixel);
+  const pixel * const src0 = src +3-stride;
+  const pixel *       src1 = src +4*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+2*stride-1;
   int H = src0[1] - src0[-1];
   int V = src1[0] - src2[ 0];
   for(k=2; k<=4; ++k) {
@@ -571,14 +605,14 @@ static void pred8x8_plane_c(uint8_t *src, int stride){
   for(j=8; j>0; --j) {
     int b = a;
     a += V;
-    src[0] = cm[ (b    ) >> 5 ];
-    src[1] = cm[ (b+  H) >> 5 ];
-    src[2] = cm[ (b+2*H) >> 5 ];
-    src[3] = cm[ (b+3*H) >> 5 ];
-    src[4] = cm[ (b+4*H) >> 5 ];
-    src[5] = cm[ (b+5*H) >> 5 ];
-    src[6] = cm[ (b+6*H) >> 5 ];
-    src[7] = cm[ (b+7*H) >> 5 ];
+    src[0] = CLIP((b    ) >> 5);
+    src[1] = CLIP((b+  H) >> 5);
+    src[2] = CLIP((b+2*H) >> 5);
+    src[3] = CLIP((b+3*H) >> 5);
+    src[4] = CLIP((b+4*H) >> 5);
+    src[5] = CLIP((b+5*H) >> 5);
+    src[6] = CLIP((b+6*H) >> 5);
+    src[7] = CLIP((b+7*H) >> 5);
     src += stride;
   }
 }
@@ -616,46 +650,64 @@ static void pred8x8_plane_c(uint8_t *src, int stride){
 #define PREDICT_8x8_DC(v) \
     int y; \
     for( y = 0; y < 8; y++ ) { \
-        ((uint32_t*)src)[0] = \
-        ((uint32_t*)src)[1] = v; \
+        ((pixel4*)src)[0] = \
+        ((pixel4*)src)[1] = v; \
         src += stride; \
     }
 
-static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
-    PREDICT_8x8_DC(0x80808080);
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+
+    PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
 }
-static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+
     PREDICT_8x8_LOAD_LEFT;
-    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
+    const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
     PREDICT_8x8_DC(dc);
 }
-static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+
     PREDICT_8x8_LOAD_TOP;
-    const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
+    const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
     PREDICT_8x8_DC(dc);
 }
-static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOP;
-    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
-                         +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
+    const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
+                                     +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
     PREDICT_8x8_DC(dc);
 }
-static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+
     PREDICT_8x8_LOAD_LEFT;
-#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
-               ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
+#define ROW(y) ((pixel4*)(src+y*stride))[0] =\
+               ((pixel4*)(src+y*stride))[1] = PIXEL_SPLAT_X4(l##y)
     ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
 #undef ROW
 }
-static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
     int y;
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
+
     PREDICT_8x8_LOAD_TOP;
     src[0] = t0;
     src[1] = t1;
@@ -665,11 +717,15 @@ static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright,
     src[5] = t5;
     src[6] = t6;
     src[7] = t7;
-    for( y = 1; y < 8; y++ )
-        *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
+    for( y = 1; y < 8; y++ ) {
+        ((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0];
+        ((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1];
+    }
 }
-static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_TOPRIGHT;
     SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
@@ -688,8 +744,10 @@ static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright
     SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
     SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
 }
-static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -708,10 +766,11 @@ static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_toprigh
     SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
     SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
     SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
-
 }
-static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -738,8 +797,10 @@ static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_top
     SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
     SRC(7,0)= (t6 + t7 + 1) >> 1;
 }
-static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -766,8 +827,10 @@ static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_to
     SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
     SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
 }
-static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_TOPRIGHT;
     SRC(0,0)= (t0 + t1 + 1) >> 1;
@@ -793,8 +856,10 @@ static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topr
     SRC(7,6)= (t10 + t11 + 1) >> 1;
     SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
 }
-static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
 {
+    pixel *src = (pixel*)_src;
+    int stride = _stride/sizeof(pixel);
     PREDICT_8x8_LOAD_LEFT;
     SRC(0,0)= (l0 + l1 + 1) >> 1;
     SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
@@ -825,11 +890,14 @@ static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topr
 #undef PL
 #undef SRC
 
-static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
     int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride /= sizeof(pixel);
     pix -= stride;
     for(i=0; i<4; i++){
-        uint8_t v = pix[0];
+        pixel v = pix[0];
         pix[1*stride]= v += block[0];
         pix[2*stride]= v += block[4];
         pix[3*stride]= v += block[8];
@@ -839,10 +907,13 @@ static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int strid
     }
 }
 
-static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
     int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride /= sizeof(pixel);
     for(i=0; i<4; i++){
-        uint8_t v = pix[-1];
+        pixel v = pix[-1];
         pix[0]= v += block[0];
         pix[1]= v += block[1];
         pix[2]= v += block[2];
@@ -852,11 +923,14 @@ static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int str
     }
 }
 
-static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
     int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride /= sizeof(pixel);
     pix -= stride;
     for(i=0; i<8; i++){
-        uint8_t v = pix[0];
+        pixel v = pix[0];
         pix[1*stride]= v += block[0];
         pix[2*stride]= v += block[8];
         pix[3*stride]= v += block[16];
@@ -870,10 +944,13 @@ static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stri
     }
 }
 
-static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
     int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride /= sizeof(pixel);
     for(i=0; i<8; i++){
-        uint8_t v = pix[-1];
+        pixel v = pix[-1];
         pix[0]= v += block[0];
         pix[1]= v += block[1];
         pix[2]= v += block[2];
@@ -887,26 +964,26 @@ static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int st
     }
 }
 
-static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
     int i;
     for(i=0; i<16; i++)
-        pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
 
-static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
     int i;
     for(i=0; i<16; i++)
-        pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
 
-static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
     int i;
     for(i=0; i<4; i++)
-        pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
 
-static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
     int i;
     for(i=0; i<4; i++)
-        pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
-- 
cgit v1.2.3


From 5d4bd9cc8958205601f7a9088dd54ed3714d40da Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:54 +0200
Subject: Template h264idct_template.c with respect to pixel size, etc.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264idct_template.c | 109 ++++++++++++++++++++++++-----------------
 1 file changed, 64 insertions(+), 45 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index 3069e41797..ed1f18c9b6 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -42,9 +42,18 @@ static const uint8_t scan8[16 + 2*4]={
 };
 #endif
 
-static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+#define pixel  uint8_t
+#define dctcoef DCTELEM
+#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+#define CLIP(a) cm[a]
+#define FUNCC(a) a ## _c
+
+static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    INIT_CLIP
+    pixel *dst = (pixel*)_dst;
+    dctcoef *block = (dctcoef*)_block;
+    stride /= sizeof(pixel);
 
     block[0] += 1<<(shift-1);
 
@@ -66,28 +75,31 @@ static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int str
         const int z2= (block[1 + block_stride*i]>>1) -  block[3 + block_stride*i];
         const int z3=  block[1 + block_stride*i]     + (block[3 + block_stride*i]>>1);
 
-        dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
-        dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
-        dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
-        dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
+        dst[i + 0*stride]= CLIP(add*dst[i + 0*stride] + ((z0 + z3) >> shift));
+        dst[i + 1*stride]= CLIP(add*dst[i + 1*stride] + ((z1 + z2) >> shift));
+        dst[i + 2*stride]= CLIP(add*dst[i + 2*stride] + ((z1 - z2) >> shift));
+        dst[i + 3*stride]= CLIP(add*dst[i + 3*stride] + ((z0 - z3) >> shift));
     }
 }
 
-void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
-    idct_internal(dst, block, stride, 4, 6, 1);
+void FUNCC(ff_h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride){
+    FUNCC(idct_internal)(dst, block, stride, 4, 6, 1);
 }
 
-void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
-    idct_internal(dst, block, stride, 8, 3, 1);
+void FUNCC(ff_h264_lowres_idct_add)(uint8_t *dst, int stride, DCTELEM *block){
+    FUNCC(idct_internal)(dst, block, stride, 8, 3, 1);
 }
 
-void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
-    idct_internal(dst, block, stride, 8, 3, 0);
+void FUNCC(ff_h264_lowres_idct_put)(uint8_t *dst, int stride, DCTELEM *block){
+    FUNCC(idct_internal)(dst, block, stride, 8, 3, 0);
 }
 
-void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
+void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    INIT_CLIP
+    pixel *dst = (pixel*)_dst;
+    dctcoef *block = (dctcoef*)_block;
+    stride /= sizeof(pixel);
 
     block[0] += 32;
 
@@ -144,90 +156,96 @@ void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
         const int b5 = (a3>>2) - a5;
         const int b7 =  a7 - (a1>>2);
 
-        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
-        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
-        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
-        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
-        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
-        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
-        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
-        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
+        dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) );
+        dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) );
+        dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) );
+        dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) );
+        dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) );
+        dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) );
+        dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) );
+        dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) );
     }
 }
 
 // assumes all AC coefs are 0
-void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
     int i, j;
-    int dc = (block[0] + 32) >> 6;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
+    int dc = (((dctcoef*)block)[0] + 32) >> 6;
+    INIT_CLIP
+    pixel *dst = (pixel*)_dst;
+    stride /= sizeof(pixel);
     for( j = 0; j < 4; j++ )
     {
         for( i = 0; i < 4; i++ )
-            dst[i] = cm[ dst[i] ];
+            dst[i] = CLIP( dst[i] + dc );
         dst += stride;
     }
 }
 
-void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
     int i, j;
-    int dc = (block[0] + 32) >> 6;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
+    int dc = (((dctcoef*)block)[0] + 32) >> 6;
+    INIT_CLIP
+    pixel *dst = (pixel*)_dst;
+    stride /= sizeof(pixel);
     for( j = 0; j < 8; j++ )
     {
         for( i = 0; i < 8; i++ )
-            dst[i] = cm[ dst[i] ];
+            dst[i] = CLIP( dst[i] + dc );
         dst += stride;
     }
 }
 
-void ff_h264_idct_add16_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
     int i;
     for(i=0; i<16; i++){
         int nnz = nnzc[ scan8[i] ];
         if(nnz){
-            if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
-            else                      idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
+            if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
+            else                                  FUNCC(idct_internal      )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1);
         }
     }
 }
 
-void ff_h264_idct_add16intra_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
     int i;
     for(i=0; i<16; i++){
-        if(nnzc[ scan8[i] ]) idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
-        else if(block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
+        if(nnzc[ scan8[i] ])             FUNCC(idct_internal      )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1);
+        else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
     }
 }
 
-void ff_h264_idct8_add4_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
     int i;
     for(i=0; i<16; i+=4){
         int nnz = nnzc[ scan8[i] ];
         if(nnz){
-            if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_c(dst + block_offset[i], block + i*16, stride);
-            else                      ff_h264_idct8_add_c   (dst + block_offset[i], block + i*16, stride);
+            if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
+            else                                  FUNCC(ff_h264_idct8_add   )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
         }
     }
 }
 
-void ff_h264_idct_add8_c(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
     int i;
     for(i=16; i<16+8; i++){
         if(nnzc[ scan8[i] ])
-            ff_h264_idct_add_c   (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
-        else if(block[i*16])
-            ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+            FUNCC(ff_h264_idct_add   )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
+        else if(((dctcoef*)block)[i*16])
+            FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
     }
 }
 /**
  * IDCT transforms the 16 dc values and dequantizes them.
  * @param qp quantization parameter
  */
-void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul){
+void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int qmul){
 #define stride 16
     int i;
     int temp[16];
     static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
+    dctcoef *input = (dctcoef*)_input;
+    dctcoef *output = (dctcoef*)_output;
 
     for(i=0; i<4; i++){
         const int z0= input[4*i+0] + input[4*i+1];
@@ -256,10 +274,11 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul){
 #undef stride
 }
 
-void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
+void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
     const int stride= 16*2;
     const int xStride= 16;
     int a,b,c,d,e;
+    dctcoef *block = (dctcoef*)_block;
 
     a= block[stride*0 + xStride*0];
     b= block[stride*0 + xStride*1];
-- 
cgit v1.2.3


From 87ce8b495f0e73625bc583e01c07bcd3e67b95ab Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:55 +0200
Subject: Template dsputil_template.c with respect to pixel size, etc.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/dsputil_template.c | 704 ++++++++++++++++++++++++------------------
 1 file changed, 398 insertions(+), 306 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 866817ce2e..f69c4671f9 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -29,18 +29,46 @@
 
 #include "dsputil.h"
 
+#define BIT_DEPTH 8
+
+#define pixel  uint8_t
+#define pixel2 uint16_t
+#define pixel4 uint32_t
+#define dctcoef int16_t
+
+#define FUNC(a)  a
+#define FUNCC(a) a ## _c
+#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+#define CLIP(a) cm[a]
+#define AV_RN2P AV_RN16
+#define AV_RN4P AV_RN32
+#define PIXEL_MAX ((1<<BIT_DEPTH)-1)
+
+#define no_rnd_avg_pixel4 no_rnd_avg32
+#define    rnd_avg_pixel4    rnd_avg32
+
 /* draw the edges of width 'w' of an image of size width, height */
 //FIXME check that this is ok for mpeg4 interlaced
-static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides)
+static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int sides)
 {
-    uint8_t *ptr, *last_line;
+    pixel *buf = (pixel*)_buf;
+    int wrap = _wrap / sizeof(pixel);
+    pixel *ptr, *last_line;
     int i;
 
     /* left and right */
     ptr = buf;
     for(i=0;i<height;i++) {
+#if BIT_DEPTH > 8
+        int j;
+        for (j = 0; j < w; j++) {
+            ptr[j-w] = ptr[0];
+            ptr[j+width] = ptr[width-1];
+        }
+#else
         memset(ptr - w, ptr[0], w);
         memset(ptr + width, ptr[width-1], w);
+#endif
         ptr += wrap;
     }
 
@@ -49,10 +77,10 @@ static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, i
     last_line = buf + (height - 1) * wrap;
     if (sides & EDGE_TOP)
         for(i = 0; i < w; i++)
-            memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top
+            memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
     if (sides & EDGE_BOTTOM)
         for (i = 0; i < w; i++)
-            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom
+            memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
 }
 
 /**
@@ -67,7 +95,7 @@ static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, i
  * @param w width of the source buffer
  * @param h height of the source buffer
  */
-void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
+void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
                                     int src_x, int src_y, int w, int h){
     int x, y;
     int start_y, start_x, end_y, end_x;
@@ -80,10 +108,10 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
         src_y=1-block_h;
     }
     if(src_x>= w){
-        src+= (w-1-src_x);
+        src+= (w-1-src_x)*sizeof(pixel);
         src_x=w-1;
     }else if(src_x<=-block_w){
-        src+= (1-block_w-src_x);
+        src+= (1-block_w-src_x)*sizeof(pixel);
         src_x=1-block_w;
     }
 
@@ -95,18 +123,18 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
     assert(start_x < end_x && block_w);
 
     w    = end_x - start_x;
-    src += start_y*linesize + start_x;
-    buf += start_x;
+    src += start_y*linesize + start_x*sizeof(pixel);
+    buf += start_x*sizeof(pixel);
 
     //top
     for(y=0; y<start_y; y++){
-        memcpy(buf, src, w);
+        memcpy(buf, src, w*sizeof(pixel));
         buf += linesize;
     }
 
     // copy existing part
     for(; y<end_y; y++){
-        memcpy(buf, src, w);
+        memcpy(buf, src, w*sizeof(pixel));
         src += linesize;
         buf += linesize;
     }
@@ -114,28 +142,33 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo
     //bottom
     src -= linesize;
     for(; y<block_h; y++){
-        memcpy(buf, src, w);
+        memcpy(buf, src, w*sizeof(pixel));
         buf += linesize;
     }
 
-    buf -= block_h * linesize + start_x;
+    buf -= block_h * linesize + start_x*sizeof(pixel);
     while (block_h--){
+        pixel *bufp = (pixel*)buf;
        //left
         for(x=0; x<start_x; x++){
-            buf[x] = buf[start_x];
+            bufp[x] = bufp[start_x];
         }
 
        //right
         for(x=end_x; x<block_w; x++){
-            buf[x] = buf[end_x - 1];
+            bufp[x] = bufp[end_x - 1];
         }
         buf += linesize;
     }
 }
 
-static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
 {
     int i;
+    pixel *restrict pixels = (pixel *restrict)_pixels;
+    dctcoef *block = (dctcoef*)_block;
+    line_size /= sizeof(pixel);
+
     for(i=0;i<8;i++) {
         pixels[0] += block[0];
         pixels[1] += block[1];
@@ -150,9 +183,13 @@ static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_siz
     }
 }
 
-static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
 {
     int i;
+    pixel *restrict pixels = (pixel *restrict)_pixels;
+    dctcoef *block = (dctcoef*)_block;
+    line_size /= sizeof(pixel);
+
     for(i=0;i<4;i++) {
         pixels[0] += block[0];
         pixels[1] += block[1];
@@ -296,127 +333,128 @@ static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels
         }\
 }\
 \
-CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
+CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8*sizeof(pixel))
 
 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
 #else // 64 bit variant
 
 #define PIXOP2(OPNAME, OP) \
-static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
     int i;\
     for(i=0; i<h; i++){\
-        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
+        OP(*((pixel2*)(block  )), AV_RN2P(pixels  ));\
         pixels+=line_size;\
         block +=line_size;\
     }\
 }\
-static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
     int i;\
     for(i=0; i<h; i++){\
-        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        OP(*((pixel4*)(block  )), AV_RN4P(pixels  ));\
         pixels+=line_size;\
         block +=line_size;\
     }\
 }\
-static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
     int i;\
     for(i=0; i<h; i++){\
-        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
-        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
+        OP(*((pixel4*)(block                )), AV_RN4P(pixels                ));\
+        OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
         pixels+=line_size;\
         block +=line_size;\
     }\
 }\
-static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
 }\
 \
-static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                 int src_stride1, int src_stride2, int h){\
     int i;\
     for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN32(&src1[i*src_stride1  ]);\
-        b= AV_RN32(&src2[i*src_stride2  ]);\
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
-        a= AV_RN32(&src1[i*src_stride1+4]);\
-        b= AV_RN32(&src2[i*src_stride2+4]);\
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
+        pixel4 a,b;\
+        a= AV_RN4P(&src1[i*src_stride1  ]);\
+        b= AV_RN4P(&src2[i*src_stride2  ]);\
+        OP(*((pixel4*)&dst[i*dst_stride  ]), no_rnd_avg_pixel4(a, b));\
+        a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
+        b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
+        OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
     }\
 }\
 \
-static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                 int src_stride1, int src_stride2, int h){\
     int i;\
     for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN32(&src1[i*src_stride1  ]);\
-        b= AV_RN32(&src2[i*src_stride2  ]);\
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
-        a= AV_RN32(&src1[i*src_stride1+4]);\
-        b= AV_RN32(&src2[i*src_stride2+4]);\
-        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
+        pixel4 a,b;\
+        a= AV_RN4P(&src1[i*src_stride1  ]);\
+        b= AV_RN4P(&src2[i*src_stride2  ]);\
+        OP(*((pixel4*)&dst[i*dst_stride  ]), rnd_avg_pixel4(a, b));\
+        a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
+        b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
+        OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
     }\
 }\
 \
-static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                 int src_stride1, int src_stride2, int h){\
     int i;\
     for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN32(&src1[i*src_stride1  ]);\
-        b= AV_RN32(&src2[i*src_stride2  ]);\
-        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+        pixel4 a,b;\
+        a= AV_RN4P(&src1[i*src_stride1  ]);\
+        b= AV_RN4P(&src2[i*src_stride2  ]);\
+        OP(*((pixel4*)&dst[i*dst_stride  ]), rnd_avg_pixel4(a, b));\
     }\
 }\
 \
-static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                 int src_stride1, int src_stride2, int h){\
     int i;\
     for(i=0; i<h; i++){\
-        uint32_t a,b;\
-        a= AV_RN16(&src1[i*src_stride1  ]);\
-        b= AV_RN16(&src2[i*src_stride2  ]);\
-        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+        pixel4 a,b;\
+        a= AV_RN2P(&src1[i*src_stride1  ]);\
+        b= AV_RN2P(&src2[i*src_stride2  ]);\
+        OP(*((pixel2*)&dst[i*dst_stride  ]), rnd_avg_pixel4(a, b));\
     }\
 }\
 \
-static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                 int src_stride1, int src_stride2, int h){\
-    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
-    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+    FUNC(OPNAME ## _pixels8_l2)(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
 }\
 \
-static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                 int src_stride1, int src_stride2, int h){\
-    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
-    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
 }\
 \
-static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    /* FIXME HIGH BIT DEPTH */\
     int i;\
     for(i=0; i<h; i++){\
         uint32_t a, b, c, d, l0, l1, h0, h1;\
@@ -451,24 +489,25 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, cons
     }\
 }\
 \
-static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
-    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
 }\
 \
-static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    /* FIXME HIGH BIT DEPTH*/\
     int i;\
     for(i=0; i<h; i++){\
         uint32_t a, b, c, d, l0, l1, h0, h1;\
@@ -502,20 +541,23 @@ static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src
         OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
     }\
 }\
-static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
-    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
-    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    FUNC(OPNAME ## _pixels8_l4)(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
 }\
-static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
                  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
-    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
-    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
 }\
 \
-static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
 {\
         int i, a0, b0, a1, b1;\
+        pixel *block = (pixel*)_block;\
+        const pixel *pixels = (const pixel*)_pixels;\
+        line_size /= sizeof(pixel);\
         a0= pixels[0];\
         b0= pixels[1] + 2;\
         a0 += b0;\
@@ -546,8 +588,9 @@ static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixel
         }\
 }\
 \
-static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
 {\
+        /* FIXME HIGH BIT DEPTH */\
         int i;\
         const uint32_t a= AV_RN32(pixels  );\
         const uint32_t b= AV_RN32(pixels+1);\
@@ -582,8 +625,9 @@ static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixel
         }\
 }\
 \
-static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
 {\
+    /* FIXME HIGH BIT DEPTH */\
     int j;\
     for(j=0; j<2; j++){\
         int i;\
@@ -623,8 +667,9 @@ static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixel
     }\
 }\
 \
-static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
 {\
+    /* FIXME HIGH BIT DEPTH */\
     int j;\
     for(j=0; j<2; j++){\
         int i;\
@@ -664,18 +709,17 @@ static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t
     }\
 }\
 \
-CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
-av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16)    , FUNCC(OPNAME ## _pixels8)    , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
+av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16)    , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
 
-#define op_avg(a, b) a = rnd_avg32(a, b)
+#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
 #endif
-
 #define op_put(a, b) a = b
 
 PIXOP2(avg, op_avg)
@@ -686,21 +730,24 @@ PIXOP2(put, op_put)
 #define put_no_rnd_pixels8_c  put_pixels8_c
 #define put_no_rnd_pixels16_c put_pixels16_c
 
-static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
-    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h);
 }
 
-static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
-    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h);
 }
 
 #define H264_CHROMA_MC(OPNAME, OP)\
-static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
     const int A=(8-x)*(8-y);\
     const int B=(  x)*(8-y);\
     const int C=(8-x)*(  y);\
     const int D=(  x)*(  y);\
     int i;\
+    stride /= sizeof(pixel);\
     \
     assert(x<8 && y<8 && x>=0 && y>=0);\
 \
@@ -723,12 +770,15 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
     }\
 }\
 \
-static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
     const int A=(8-x)*(8-y);\
     const int B=(  x)*(8-y);\
     const int C=(8-x)*(  y);\
     const int D=(  x)*(  y);\
     int i;\
+    stride /= sizeof(pixel);\
     \
     assert(x<8 && y<8 && x>=0 && y>=0);\
 \
@@ -755,12 +805,15 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
     }\
 }\
 \
-static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
     const int A=(8-x)*(8-y);\
     const int B=(  x)*(8-y);\
     const int C=(8-x)*(  y);\
     const int D=(  x)*(  y);\
     int i;\
+    stride /= sizeof(pixel);\
     \
     assert(x<8 && y<8 && x>=0 && y>=0);\
 \
@@ -804,10 +857,14 @@ H264_CHROMA_MC(avg_       , op_avg)
 #undef op_put
 
 #define H264_LOWPASS(OPNAME, OP, OP2) \
-static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
     const int h=2;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     for(i=0; i<h; i++)\
     {\
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
@@ -817,10 +874,14 @@ static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src,
     }\
 }\
 \
-static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
     const int w=2;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     for(i=0; i<w; i++)\
     {\
         const int srcB= src[-2*srcStride];\
@@ -837,39 +898,48 @@ static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src,
     }\
 }\
 \
-static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
     const int h=2;\
     const int w=2;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     src -= 2*srcStride;\
     for(i=0; i<h+5; i++)\
     {\
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
         tmp+=tmpStride;\
         src+=srcStride;\
     }\
     tmp -= tmpStride*(h+5-2);\
     for(i=0; i<w; i++)\
     {\
-        const int tmpB= tmp[-2*tmpStride];\
-        const int tmpA= tmp[-1*tmpStride];\
-        const int tmp0= tmp[0 *tmpStride];\
-        const int tmp1= tmp[1 *tmpStride];\
-        const int tmp2= tmp[2 *tmpStride];\
-        const int tmp3= tmp[3 *tmpStride];\
-        const int tmp4= tmp[4 *tmpStride];\
+        const int tmpB= tmp[-2*tmpStride] - pad;\
+        const int tmpA= tmp[-1*tmpStride] - pad;\
+        const int tmp0= tmp[0 *tmpStride] - pad;\
+        const int tmp1= tmp[1 *tmpStride] - pad;\
+        const int tmp2= tmp[2 *tmpStride] - pad;\
+        const int tmp3= tmp[3 *tmpStride] - pad;\
+        const int tmp4= tmp[4 *tmpStride] - pad;\
         OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
         OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
         dst++;\
         tmp++;\
     }\
 }\
-static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
     const int h=4;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     for(i=0; i<h; i++)\
     {\
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
@@ -881,10 +951,14 @@ static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstSt
     }\
 }\
 \
-static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
     const int w=4;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     for(i=0; i<w; i++)\
     {\
         const int srcB= src[-2*srcStride];\
@@ -905,33 +979,38 @@ static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstSt
     }\
 }\
 \
-static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
     const int h=4;\
     const int w=4;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     src -= 2*srcStride;\
     for(i=0; i<h+5; i++)\
     {\
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
-        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
-        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
         tmp+=tmpStride;\
         src+=srcStride;\
     }\
     tmp -= tmpStride*(h+5-2);\
     for(i=0; i<w; i++)\
     {\
-        const int tmpB= tmp[-2*tmpStride];\
-        const int tmpA= tmp[-1*tmpStride];\
-        const int tmp0= tmp[0 *tmpStride];\
-        const int tmp1= tmp[1 *tmpStride];\
-        const int tmp2= tmp[2 *tmpStride];\
-        const int tmp3= tmp[3 *tmpStride];\
-        const int tmp4= tmp[4 *tmpStride];\
-        const int tmp5= tmp[5 *tmpStride];\
-        const int tmp6= tmp[6 *tmpStride];\
+        const int tmpB= tmp[-2*tmpStride] - pad;\
+        const int tmpA= tmp[-1*tmpStride] - pad;\
+        const int tmp0= tmp[0 *tmpStride] - pad;\
+        const int tmp1= tmp[1 *tmpStride] - pad;\
+        const int tmp2= tmp[2 *tmpStride] - pad;\
+        const int tmp3= tmp[3 *tmpStride] - pad;\
+        const int tmp4= tmp[4 *tmpStride] - pad;\
+        const int tmp5= tmp[5 *tmpStride] - pad;\
+        const int tmp6= tmp[6 *tmpStride] - pad;\
         OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
         OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
         OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
@@ -941,10 +1020,14 @@ static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t
     }\
 }\
 \
-static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
     const int h=8;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     for(i=0; i<h; i++)\
     {\
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
@@ -960,10 +1043,14 @@ static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstSt
     }\
 }\
 \
-static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
     const int w=8;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     for(i=0; i<w; i++)\
     {\
         const int srcB= src[-2*srcStride];\
@@ -992,41 +1079,46 @@ static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstSt
     }\
 }\
 \
-static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
     const int h=8;\
     const int w=8;\
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+    INIT_CLIP\
     int i;\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    dstStride /= sizeof(pixel);\
+    srcStride /= sizeof(pixel);\
     src -= 2*srcStride;\
     for(i=0; i<h+5; i++)\
     {\
-        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
-        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
-        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
-        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
-        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
-        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
-        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
-        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
+        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
+        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
+        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
+        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
         tmp+=tmpStride;\
         src+=srcStride;\
     }\
     tmp -= tmpStride*(h+5-2);\
     for(i=0; i<w; i++)\
     {\
-        const int tmpB= tmp[-2*tmpStride];\
-        const int tmpA= tmp[-1*tmpStride];\
-        const int tmp0= tmp[0 *tmpStride];\
-        const int tmp1= tmp[1 *tmpStride];\
-        const int tmp2= tmp[2 *tmpStride];\
-        const int tmp3= tmp[3 *tmpStride];\
-        const int tmp4= tmp[4 *tmpStride];\
-        const int tmp5= tmp[5 *tmpStride];\
-        const int tmp6= tmp[6 *tmpStride];\
-        const int tmp7= tmp[7 *tmpStride];\
-        const int tmp8= tmp[8 *tmpStride];\
-        const int tmp9= tmp[9 *tmpStride];\
-        const int tmp10=tmp[10*tmpStride];\
+        const int tmpB= tmp[-2*tmpStride] - pad;\
+        const int tmpA= tmp[-1*tmpStride] - pad;\
+        const int tmp0= tmp[0 *tmpStride] - pad;\
+        const int tmp1= tmp[1 *tmpStride] - pad;\
+        const int tmp2= tmp[2 *tmpStride] - pad;\
+        const int tmp3= tmp[3 *tmpStride] - pad;\
+        const int tmp4= tmp[4 *tmpStride] - pad;\
+        const int tmp5= tmp[5 *tmpStride] - pad;\
+        const int tmp6= tmp[6 *tmpStride] - pad;\
+        const int tmp7= tmp[7 *tmpStride] - pad;\
+        const int tmp8= tmp[8 *tmpStride] - pad;\
+        const int tmp9= tmp[9 *tmpStride] - pad;\
+        const int tmp10=tmp[10*tmpStride] - pad;\
         OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
         OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
         OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
@@ -1040,175 +1132,175 @@ static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t
     }\
 }\
 \
-static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst                , src                , dstStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
     src += 8*srcStride;\
     dst += 8*dstStride;\
-    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst                , src                , dstStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
 }\
 \
-static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
-    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst                , src                , dstStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
     src += 8*srcStride;\
     dst += 8*dstStride;\
-    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
-    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst                , src                , dstStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
 }\
 \
-static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
-    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
-    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst                , tmp  , src                , dstStride, tmpStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
     src += 8*srcStride;\
     dst += 8*dstStride;\
-    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
-    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst                , tmp  , src                , dstStride, tmpStride, srcStride);\
+    FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
 }\
 
 #define H264_MC(OPNAME, SIZE) \
-static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
-    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
+static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
+    FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t half[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
-    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
+    FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t half[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t half[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t half[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
+    FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
+    FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfH[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfV[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 \
-static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t full[SIZE*(SIZE+5)];\
-    uint8_t * const full_mid= full + SIZE*2;\
-    int16_t tmp[SIZE*(SIZE+5)];\
-    uint8_t halfV[SIZE*SIZE];\
-    uint8_t halfHV[SIZE*SIZE];\
-    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
-    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
-    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
-    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+    uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+    FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel),  stride, SIZE + 5);\
+    FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+    FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+    FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
 }\
 
-#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+#define op_avg(a, b)  a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
-#define op_put(a, b)  a = cm[((b) + 16)>>5]
-#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
-#define op2_put(a, b)  a = cm[((b) + 512)>>10]
+#define op_put(a, b)  a = CLIP(((b) + 16)>>5)
+#define op2_avg(a, b)  a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
+#define op2_put(a, b)  a = CLIP(((b) + 512)>>10)
 
 H264_LOWPASS(put_       , op_put, op2_put)
 H264_LOWPASS(avg_       , op_avg, op2_avg)
@@ -1230,28 +1322,28 @@ H264_MC(avg_, 16)
 #define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
 #define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
 
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
-    put_pixels8_c(dst, src, stride, 8);
+void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
+    FUNCC(put_pixels8)(dst, src, stride, 8);
 }
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
-    avg_pixels8_c(dst, src, stride, 8);
+void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
+    FUNCC(avg_pixels8)(dst, src, stride, 8);
 }
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
-    put_pixels16_c(dst, src, stride, 16);
+void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
+    FUNCC(put_pixels16)(dst, src, stride, 16);
 }
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
-    avg_pixels16_c(dst, src, stride, 16);
+void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
+    FUNCC(avg_pixels16)(dst, src, stride, 16);
 }
 
-static void clear_block_c(DCTELEM *block)
+static void FUNCC(clear_block)(DCTELEM *block)
 {
-    memset(block, 0, sizeof(DCTELEM)*64);
+    memset(block, 0, sizeof(dctcoef)*64);
 }
 
 /**
  * memset(blocks, 0, sizeof(DCTELEM)*6*64)
  */
-static void clear_blocks_c(DCTELEM *blocks)
+static void FUNCC(clear_blocks)(DCTELEM *blocks)
 {
-    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+    memset(blocks, 0, sizeof(dctcoef)*6*64);
 }
-- 
cgit v1.2.3


From 44ca80df3445a59bc065924d8c6110fa10367d01 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:56 +0200
Subject: Make the h264 loop filter bit depth aware.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264_loopfilter.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 35630658b5..a08ff69931 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -101,9 +101,10 @@ static const uint8_t tc0_table[52*3][4] = {
 };
 
 static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) {
-    const unsigned int index_a = qp + h->slice_alpha_c0_offset;
+    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
+    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
     const int alpha = alpha_table[index_a];
-    const int beta  = beta_table[qp + h->slice_beta_offset];
+    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
     if (alpha ==0 || beta == 0) return;
 
     if( bS[0] < 4 ) {
@@ -118,9 +119,10 @@ static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t
     }
 }
 static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
-    const unsigned int index_a = qp + h->slice_alpha_c0_offset;
+    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
+    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
     const int alpha = alpha_table[index_a];
-    const int beta  = beta_table[qp + h->slice_beta_offset];
+    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
     if (alpha ==0 || beta == 0) return;
 
     if( bS[0] < 4 ) {
@@ -136,9 +138,10 @@ static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t
 }
 
 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
-    int index_a = qp + h->slice_alpha_c0_offset;
+    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
+    int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
     int alpha = alpha_table[index_a];
-    int beta  = beta_table[qp + h->slice_beta_offset];
+    int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
     if (alpha ==0 || beta == 0) return;
 
     if( bS[0] < 4 ) {
@@ -153,9 +156,10 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int
     }
 }
 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
-    int index_a = qp + h->slice_alpha_c0_offset;
+    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
+    int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
     int alpha = alpha_table[index_a];
-    int beta  = beta_table[qp + h->slice_beta_offset];
+    int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
     if (alpha ==0 || beta == 0) return;
 
     if( bS[0] < 4 ) {
@@ -171,9 +175,10 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in
 }
 
 static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
-    const unsigned int index_a = qp + h->slice_alpha_c0_offset;
+    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
+    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
     const int alpha = alpha_table[index_a];
-    const int beta  = beta_table[qp + h->slice_beta_offset];
+    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
     if (alpha ==0 || beta == 0) return;
 
     if( bS[0] < 4 ) {
@@ -189,9 +194,10 @@ static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t
 }
 
 static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
-    const unsigned int index_a = qp + h->slice_alpha_c0_offset;
+    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
+    const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
     const int alpha = alpha_table[index_a];
-    const int beta  = beta_table[qp + h->slice_beta_offset];
+    const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
     if (alpha ==0 || beta == 0) return;
 
     if( bS[0] < 4 ) {
-- 
cgit v1.2.3


From 6e3ef511d787ff632547059f8730396ff4498e70 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:57 +0200
Subject: Add the notion of pixel size in h264 related functions.

In high bit depth the pixels will not be stored in uint8_t like in the
normal case, but in uint16_t. The pixel size is thus 1 in normal bit
depth and 2 in high bit depth.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 ffplay.c                     |   4 +-
 libavcodec/h264.c            | 323 +++++++++++++++++++++++++++++--------------
 libavcodec/h264.h            |   9 +-
 libavcodec/h264_cabac.c      | 105 +++++++-------
 libavcodec/h264_cavlc.c      |  84 ++++++-----
 libavcodec/h264_loopfilter.c |   6 +-
 libavcodec/utils.c           |   3 +-
 7 files changed, 338 insertions(+), 196 deletions(-)

(limited to 'libavcodec')

diff --git a/ffplay.c b/ffplay.c
index a09431915a..11b307eaca 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -1577,6 +1577,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
     int perms = AV_PERM_WRITE;
     int i, w, h, stride[4];
     unsigned edge;
+    int pixel_size;
 
     if (codec->codec->capabilities & CODEC_CAP_NEG_LINESIZES)
         perms |= AV_PERM_NEG_LINESIZES;
@@ -1598,6 +1599,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
     if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h)))
         return -1;
 
+    pixel_size = av_pix_fmt_descriptors[ref->format].comp[0].step_minus1+1;
     ref->video->w = codec->width;
     ref->video->h = codec->height;
     for(i = 0; i < 4; i ++) {
@@ -1605,7 +1607,7 @@ static int input_get_buffer(AVCodecContext *codec, AVFrame *pic)
         unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0;
 
         if (ref->data[i]) {
-            ref->data[i]    += (edge >> hshift) + ((edge * ref->linesize[i]) >> vshift);
+            ref->data[i]    += ((edge * pixel_size) >> hshift) + ((edge * ref->linesize[i]) >> vshift);
         }
         pic->data[i]     = ref->data[i];
         pic->linesize[i] = ref->linesize[i];
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 142a07807a..e02b277769 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -314,12 +314,13 @@ static void chroma_dc_dct_c(DCTELEM *block){
 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int src_x_offset, int src_y_offset,
-                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
+                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
+                           int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
-    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
+    uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
     uint8_t * src_cb, * src_cr;
     int extra_width= h->emu_edge_width;
     int extra_height= h->emu_edge_height;
@@ -336,8 +337,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
        || full_my < 0-extra_height
        || full_mx + 16/*FIXME*/ > pic_width + extra_width
        || full_my + 16/*FIXME*/ > pic_height + extra_height){
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
-            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
+        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
         emu=1;
     }
 
@@ -353,8 +354,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
     }
-    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
-    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
+    src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
+    src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
 
     if(emu){
         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
@@ -374,14 +375,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           int list0, int list1){
+                           int list0, int list1, int pixel_shift){
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
 
-    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
-    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
-    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -389,7 +390,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op);
+                           qpix_op, chroma_op, pixel_shift);
 
         qpix_op=  qpix_avg;
         chroma_op= chroma_avg;
@@ -399,7 +400,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                           qpix_op, chroma_op);
+                           qpix_op, chroma_op, pixel_shift);
     }
 }
 
@@ -409,12 +410,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
-                           int list0, int list1){
+                           int list0, int list1, int pixel_shift){
     MpegEncContext * const s = &h->s;
 
-    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
-    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
-    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -422,17 +423,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
         uint8_t *tmp_cb = s->obmc_scratchpad;
-        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
+        uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift);
         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
         int refn0 = h->ref_cache[0][ scan8[n] ];
         int refn1 = h->ref_cache[1][ scan8[n] ];
 
         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
                     dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
                     tmp_y, tmp_cb, tmp_cr,
-                    x_offset, y_offset, qpix_put, chroma_put);
+                    x_offset, y_offset, qpix_put, chroma_put, pixel_shift);
 
         if(h->use_weight == 2){
             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
@@ -457,7 +458,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         Picture *ref= &h->ref_list[list][refn];
         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
-                    qpix_put, chroma_put);
+                    qpix_put, chroma_put, pixel_shift);
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
@@ -476,19 +477,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
-                           int list0, int list1){
+                           int list0, int list1, int pixel_shift){
     if((h->use_weight==2 && list0 && list1
         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
        || h->use_weight==1)
         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
-                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
+                         weight_op[0], weight_op[3], weight_avg[0],
+                         weight_avg[3], list0, list1, pixel_shift);
     else
         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
-                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
+                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
+                    chroma_avg, list0, list1, pixel_shift);
 }
 
-static inline void prefetch_motion(H264Context *h, int list){
+static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){
     /* fetch pixels for estimated mv 4 macroblocks ahead
      * optimized for 64byte cache lines */
     MpegEncContext * const s = &h->s;
@@ -497,48 +500,54 @@ static inline void prefetch_motion(H264Context *h, int list){
         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
         uint8_t **src= h->ref_list[list][refn].data;
-        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
+        int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
         s->dsp.prefetch(src[0]+off, s->linesize, 4);
-        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
+        off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
     }
 }
 
-static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
-                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
+                      h264_weight_func *weight_op, h264_biweight_func *weight_avg,
+                      int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
 
     assert(IS_INTER(mb_type));
 
-    prefetch_motion(h, 0);
+    prefetch_motion(h, 0, pixel_shift);
 
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                 weight_op, weight_avg,
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
+                pixel_shift);
     }else if(IS_16X8(mb_type)){
-        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
+        mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
+                pixel_shift);
+        mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
-                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
+                pixel_shift);
     }else if(IS_8X16(mb_type)){
         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
-                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
+                pixel_shift);
         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                 &weight_op[2], &weight_avg[2],
-                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
+                pixel_shift);
     }else{
         int i;
 
@@ -554,25 +563,30 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                     &weight_op[3], &weight_avg[3],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
             }else if(IS_SUB_8X4(sub_mb_type)){
-                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
+                mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
             }else if(IS_SUB_4X8(sub_mb_type)){
                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                     &weight_op[5], &weight_avg[5],
-                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                    pixel_shift);
             }else{
                 int j;
                 assert(IS_SUB_4X4(sub_mb_type));
@@ -582,15 +596,32 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                         &weight_op[6], &weight_avg[6],
-                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
+                        pixel_shift);
                 }
             }
         }
     }
 
-    prefetch_motion(h, 1);
+    prefetch_motion(h, 1, pixel_shift);
 }
 
+#define hl_motion_fn(sh, bits) \
+static av_always_inline void hl_motion_ ## bits(H264Context *h, \
+                                       uint8_t *dest_y, \
+                                       uint8_t *dest_cb, uint8_t *dest_cr, \
+                                       qpel_mc_func (*qpix_put)[16], \
+                                       h264_chroma_mc_func (*chroma_put), \
+                                       qpel_mc_func (*qpix_avg)[16], \
+                                       h264_chroma_mc_func (*chroma_avg), \
+                                       h264_weight_func *weight_op, \
+                                       h264_biweight_func *weight_avg) \
+{ \
+    hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
+              qpix_avg, chroma_avg, weight_op, weight_avg, sh); \
+}
+hl_motion_fn(0, 8);
+hl_motion_fn(1, 16);
 
 static void free_tables(H264Context *h, int free_rbsp){
     int i;
@@ -758,8 +789,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
  * Allocate buffers which are not shared amongst multiple threads.
  */
 static int context_init(H264Context *h){
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
 
     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
@@ -861,6 +892,8 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
 
     ff_h264_decode_init_vlc();
 
+    h->pixel_shift = 0;
+
     h->thread_context[0] = h;
     h->outputed_poc = INT_MIN;
     h->prev_poc_msb= 1<<16;
@@ -888,6 +921,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
 int ff_h264_frame_start(H264Context *h){
     MpegEncContext * const s = &h->s;
     int i;
+    const int pixel_shift = h->pixel_shift;
 
     if(MPV_frame_start(s, s->avctx) < 0)
         return -1;
@@ -904,14 +938,14 @@ int ff_h264_frame_start(H264Context *h){
     assert(s->linesize && s->uvlinesize);
 
     for(i=0; i<16; i++){
-        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
     }
     for(i=0; i<4; i++){
         h->block_offset[16+i]=
-        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
         h->block_offset[24+16+i]=
-        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
     }
 
     /* can't be in alloc_tables because linesize isn't known there.
@@ -945,6 +979,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
     MpegEncContext * const s = &h->s;
     uint8_t *top_border;
     int top_idx = 1;
+    const int pixel_shift = h->pixel_shift;
 
     src_y  -=   linesize;
     src_cb -= uvlinesize;
@@ -955,9 +990,16 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
             if(!MB_MBAFF){
                 top_border = h->top_borders[0][s->mb_x];
                 AV_COPY128(top_border, src_y + 15*linesize);
+                if (pixel_shift)
+                    AV_COPY128(top_border+16, src_y+15*linesize+16);
                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                    if (pixel_shift) {
+                        AV_COPY128(top_border+32, src_cb+7*uvlinesize);
+                        AV_COPY128(top_border+48, src_cr+7*uvlinesize);
+                    } else {
                     AV_COPY64(top_border+16, src_cb+7*uvlinesize);
                     AV_COPY64(top_border+24, src_cr+7*uvlinesize);
+                    }
                 }
             }
         }else if(MB_MBAFF){
@@ -970,14 +1012,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
     // There are two lines saved, the line above the the top macroblock of a pair,
     // and the line above the bottom macroblock
     AV_COPY128(top_border, src_y + 16*linesize);
+    if (pixel_shift)
+        AV_COPY128(top_border+16, src_y+16*linesize+16);
 
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+        if (pixel_shift) {
+            AV_COPY128(top_border+32, src_cb+8*uvlinesize);
+            AV_COPY128(top_border+48, src_cr+8*uvlinesize);
+        } else {
         AV_COPY64(top_border+16, src_cb+8*uvlinesize);
         AV_COPY64(top_border+24, src_cr+8*uvlinesize);
+        }
     }
 }
 
-static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
+static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
+                                  uint8_t *src_cb, uint8_t *src_cr,
+                                  int linesize, int uvlinesize,
+                                  int xchg, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     int deblock_left;
     int deblock_top;
@@ -1002,41 +1054,62 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
         deblock_top =  (s->mb_y > !!MB_FIELD);
     }
 
-    src_y  -=   linesize + 1;
-    src_cb -= uvlinesize + 1;
-    src_cr -= uvlinesize + 1;
+    src_y  -=   linesize + 1 + pixel_shift;
+    src_cb -= uvlinesize + 1 + pixel_shift;
+    src_cr -= uvlinesize + 1 + pixel_shift;
 
     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
     top_border    = h->top_borders[top_idx][s->mb_x];
 
 #define XCHG(a,b,xchg)\
+    if (pixel_shift) {\
+        if (xchg) {\
+            AV_SWAP64(b+0,a+0);\
+            AV_SWAP64(b+8,a+8);\
+        } else {\
+            AV_COPY128(b,a); \
+        }\
+    } else \
 if (xchg) AV_SWAP64(b,a);\
 else      AV_COPY64(b,a);
 
     if(deblock_top){
         if(deblock_left){
-            XCHG(top_border_m1+8, src_y -7, 1);
+            XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
         }
-        XCHG(top_border+0, src_y +1, xchg);
-        XCHG(top_border+8, src_y +9, 1);
+        XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
+        XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
         if(s->mb_x+1 < s->mb_width){
-            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
+            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
         }
     }
-
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
         if(deblock_top){
             if(deblock_left){
-                XCHG(top_border_m1+16, src_cb -7, 1);
-                XCHG(top_border_m1+24, src_cr -7, 1);
+                XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
+                XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
             }
-            XCHG(top_border+16, src_cb+1, 1);
-            XCHG(top_border+24, src_cr+1, 1);
+            XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
+            XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
         }
     }
 }
 
-static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
+static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
+    if (high_bit_depth) {
+        return AV_RN32A(((int32_t*)mb) + index);
+    } else
+        return AV_RN16A(mb + index);
+}
+
+static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
+    if (high_bit_depth) {
+        AV_WN32A(((int32_t*)mb) + index, value);
+    } else
+        AV_WN16A(mb + index, value);
+}
+
+static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
     const int mb_x= s->mb_x;
     const int mb_y= s->mb_y;
@@ -1052,12 +1125,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
 
-    dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
-    dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
-    dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
+    dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
+    dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
+    dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
 
-    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
-    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
+    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
+    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
 
     h->list_counts[mb_xy]= h->list_count;
 
@@ -1094,6 +1167,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
     }
 
     if (!simple && IS_INTRA_PCM(mb_type)) {
+        if (pixel_shift) {
+            const int bit_depth = h->sps.bit_depth_luma;
+            int j;
+            GetBitContext gb;
+            init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
+
+            for (i = 0; i < 16; i++) {
+                uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
+                for (j = 0; j < 16; j++)
+                    tmp_y[j] = get_bits(&gb, bit_depth);
+            }
+            for (i = 0; i < 8; i++) {
+                uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
+                for (j = 0; j < 8; j++)
+                    tmp_cb[j] = get_bits(&gb, bit_depth);
+            }
+            for (i = 0; i < 8; i++) {
+                uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
+                for (j = 0; j < 8; j++)
+                    tmp_cr[j] = get_bits(&gb, bit_depth);
+            }
+        } else {
         for (i=0; i<16; i++) {
             memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
         }
@@ -1101,10 +1196,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
             memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
             memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
         }
+        }
     } else {
         if(IS_INTRA(mb_type)){
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
 
             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
@@ -1125,16 +1221,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             uint8_t * const ptr= dest_y + block_offset[i];
                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
+                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
                             }else{
                                 const int nnz = h->non_zero_count_cache[ scan8[i] ];
                                 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
                                                             (h->topright_samples_available<<i)&0x4000, linesize);
                                 if(nnz){
-                                    if(nnz == 1 && h->mb[i*16])
-                                        idct_dc_add(ptr, h->mb + i*16, linesize);
+                                    if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
+                                        idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
                                     else
-                                        idct_add   (ptr, h->mb + i*16, linesize);
+                                        idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
                                 }
                             }
                         }
@@ -1151,18 +1247,24 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
 
                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
+                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
                             }else{
                                 uint8_t *topright;
                                 int nnz, tr;
+                                uint64_t tr_high;
                                 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
                                     const int topright_avail= (h->topright_samples_available<<i)&0x8000;
                                     assert(mb_y || linesize <= block_offset[i]);
                                     if(!topright_avail){
+                                        if (pixel_shift) {
+                                            tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
+                                            topright= (uint8_t*) &tr_high;
+                                        } else {
                                         tr= ptr[3 - linesize]*0x01010101;
                                         topright= (uint8_t*) &tr;
+                                        }
                                     }else
-                                        topright= ptr + 4 - linesize;
+                                        topright= ptr + (4 << pixel_shift) - linesize;
                                 }else
                                     topright= NULL;
 
@@ -1170,10 +1272,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                                 nnz = h->non_zero_count_cache[ scan8[i] ];
                                 if(nnz){
                                     if(is_h264){
-                                        if(nnz == 1 && h->mb[i*16])
-                                            idct_dc_add(ptr, h->mb + i*16, linesize);
+                                        if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
+                                            idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
                                         else
-                                            idct_add   (ptr, h->mb + i*16, linesize);
+                                            idct_add   (ptr, h->mb + (i*16 << pixel_shift), linesize);
                                     }else
                                         ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
                                 }
@@ -1191,19 +1293,27 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
                                                                     8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
                             for(i = 0; i < 16; i++)
-                                h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
+                                dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
                         }
                     }
                 }else
                     ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
             }
             if(h->deblocking_filter)
-                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
         }else if(is_h264){
-            hl_motion(h, dest_y, dest_cb, dest_cr,
-                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                      h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
+            if (pixel_shift) {
+                hl_motion_16(h, dest_y, dest_cb, dest_cr,
+                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                             h->h264dsp.weight_h264_pixels_tab,
+                             h->h264dsp.biweight_h264_pixels_tab);
+            } else
+                hl_motion_8(h, dest_y, dest_cb, dest_cr,
+                            s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                            s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                            h->h264dsp.weight_h264_pixels_tab,
+                            h->h264dsp.biweight_h264_pixels_tab);
         }
 
 
@@ -1215,8 +1325,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
                         }else{
                             for(i=0; i<16; i++){
-                                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
-                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
+                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
                             }
                         }
                     }else{
@@ -1228,7 +1338,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                         idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
                         for(i=0; i<16; i+=di){
                             if(h->non_zero_count_cache[ scan8[i] ]){
-                                idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                                idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
                             }
                         }
                     }else{
@@ -1253,21 +1363,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
             uint8_t *dest[2] = {dest_cb, dest_cr};
             if(transform_bypass){
                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
                 }else{
                     idct_add = s->dsp.add_pixels4;
                     for(i=16; i<16+8; i++){
-                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
-                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
                     }
                 }
             }else{
                 if(is_h264){
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
@@ -1291,15 +1401,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
 /**
  * Process a macroblock; this case avoids checks for expensive uncommon cases.
  */
-static void hl_decode_mb_simple(H264Context *h){
-    hl_decode_mb_internal(h, 1);
+#define hl_decode_mb_simple(sh, bits) \
+static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
+    hl_decode_mb_internal(h, 1, sh); \
 }
+hl_decode_mb_simple(0, 8);
+hl_decode_mb_simple(1, 16);
 
 /**
  * Process a macroblock; this handles edge cases, such as interlacing.
  */
 static void av_noinline hl_decode_mb_complex(H264Context *h){
-    hl_decode_mb_internal(h, 0);
+    hl_decode_mb_internal(h, 0, h->pixel_shift);
 }
 
 void ff_h264_hl_decode_mb(H264Context *h){
@@ -1308,9 +1421,12 @@ void ff_h264_hl_decode_mb(H264Context *h){
     const int mb_type= s->current_picture.mb_type[mb_xy];
     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
 
-    if (is_complex)
+    if (is_complex) {
         hl_decode_mb_complex(h);
-    else hl_decode_mb_simple(h);
+    } else if (h->pixel_shift) {
+        hl_decode_mb_simple_16(h);
+    } else
+        hl_decode_mb_simple_8(h);
 }
 
 static int pred_weight_table(H264Context *h){
@@ -2432,6 +2548,7 @@ static void loop_filter(H264Context *h){
     int linesize, uvlinesize, mb_x, mb_y;
     const int end_mb_y= s->mb_y + FRAME_MBAFF;
     const int old_slice_type= h->slice_type;
+    const int pixel_shift = h->pixel_shift;
 
     if(h->deblocking_filter) {
         for(mb_x= 0; mb_x<s->mb_width; mb_x++){
@@ -2447,9 +2564,9 @@ static void loop_filter(H264Context *h){
 
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
-                dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
-                dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
-                dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
+                dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
+                dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
+                dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
                     //FIXME simplify above
 
                 if (MB_FIELD) {
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 0a0ec36649..470d9bee63 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -265,6 +265,7 @@ typedef struct MMCO{
 typedef struct H264Context{
     MpegEncContext s;
     H264DSPContext h264dsp;
+    int pixel_shift;    ///< 0 for 8-bit H264, 1 for high-bit-depth H264
     int chroma_qp[2]; //QPc
 
     int qp_thresh;      ///< QP threshold to skip loopfilter
@@ -296,7 +297,7 @@ typedef struct H264Context{
     unsigned int top_samples_available;
     unsigned int topright_samples_available;
     unsigned int left_samples_available;
-    uint8_t (*top_borders[2])[16+2*8];
+    uint8_t (*top_borders[2])[(16+2*8)*2];
 
     /**
      * non zero coeff count cache.
@@ -406,9 +407,9 @@ typedef struct H264Context{
     GetBitContext *intra_gb_ptr;
     GetBitContext *inter_gb_ptr;
 
-    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
-    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16];
-    DCTELEM mb_padding[256];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
+    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
+    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
+    DCTELEM mb_padding[256*2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
 
     /**
      * Cabac
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 25215fabe8..61ad4493f9 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1100,47 +1100,54 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
         }
     }
 
-    do {
-        uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
-
-        int j= scantable[index[--coeff_count]];
-
-        if( get_cabac( CC, ctx ) == 0 ) {
-            node_ctx = coeff_abs_level_transition[0][node_ctx];
-            if( is_dc ) {
-                block[j] = get_cabac_bypass_sign( CC, -1);
-            }else{
-                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
-            }
-        } else {
-            int coeff_abs = 2;
-            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
-            node_ctx = coeff_abs_level_transition[1][node_ctx];
-
-            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
-                coeff_abs++;
-            }
-
-            if( coeff_abs >= 15 ) {
-                int j = 0;
-                while( get_cabac_bypass( CC ) ) {
-                    j++;
-                }
-
-                coeff_abs=1;
-                while( j-- ) {
-                    coeff_abs += coeff_abs + get_cabac_bypass( CC );
-                }
-                coeff_abs+= 14;
-            }
+#define STORE_BLOCK(type) \
+    do { \
+        uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; \
+ \
+        int j= scantable[index[--coeff_count]]; \
+ \
+        if( get_cabac( CC, ctx ) == 0 ) { \
+            node_ctx = coeff_abs_level_transition[0][node_ctx]; \
+            if( is_dc ) { \
+                ((type*)block)[j] = get_cabac_bypass_sign( CC, -1); \
+            }else{ \
+                ((type*)block)[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; \
+            } \
+        } else { \
+            int coeff_abs = 2; \
+            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
+            node_ctx = coeff_abs_level_transition[1][node_ctx]; \
+\
+            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
+                coeff_abs++; \
+            } \
+\
+            if( coeff_abs >= 15 ) { \
+                int j = 0; \
+                while( get_cabac_bypass( CC ) ) { \
+                    j++; \
+                } \
+\
+                coeff_abs=1; \
+                while( j-- ) { \
+                    coeff_abs += coeff_abs + get_cabac_bypass( CC ); \
+                } \
+                coeff_abs+= 14; \
+            } \
+\
+            if( is_dc ) { \
+                ((type*)block)[j] = get_cabac_bypass_sign( CC, -coeff_abs ); \
+            }else{ \
+                ((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \
+            } \
+        } \
+    } while ( coeff_count );
 
-            if( is_dc ) {
-                block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
-            }else{
-                block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
-            }
-        }
-    } while( coeff_count );
+    if (h->pixel_shift) {
+        STORE_BLOCK(int32_t)
+    } else {
+        STORE_BLOCK(int16_t)
+    }
 #ifdef CABAC_ON_STACK
             h->cabac.range     = cc.range     ;
             h->cabac.low       = cc.low       ;
@@ -1196,6 +1203,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
     int mb_xy;
     int mb_type, partition_count, cbp = 0;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
+    const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
 
@@ -1304,6 +1312,7 @@ decode_intra_mb:
     h->slice_table[ mb_xy ]= h->slice_num;
 
     if(IS_INTRA_PCM(mb_type)) {
+        const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
         const uint8_t *ptr;
 
         // We assume these blocks are very rare so we do not optimize it.
@@ -1316,9 +1325,9 @@ decode_intra_mb:
         }
 
         // The pixels are stored in the same order as levels in h->mb array.
-        memcpy(h->mb, ptr, 256); ptr+=256;
+        memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
         if(CHROMA){
-            memcpy(h->mb+128, ptr, 128); ptr+=128;
+            memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
         }
 
         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
@@ -1652,13 +1661,15 @@ decode_intra_mb:
             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
             AV_ZERO128(h->mb_luma_dc+0);
             AV_ZERO128(h->mb_luma_dc+8);
+            AV_ZERO128(h->mb_luma_dc+16);
+            AV_ZERO128(h->mb_luma_dc+24);
             decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
 
             if( cbp&15 ) {
                 qmul = h->dequant4_coeff[0][s->qscale];
                 for( i = 0; i < 16; i++ ) {
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
-                    decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
+                    decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
                 }
             } else {
                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
@@ -1668,7 +1679,7 @@ decode_intra_mb:
             for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
                 if( cbp & (1<<i8x8) ) {
                     if( IS_8x8DCT(mb_type) ) {
-                        decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
+                        decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
                             scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
                     } else {
                         qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
@@ -1676,7 +1687,7 @@ decode_intra_mb:
                             const int index = 4*i8x8 + i4x4;
                             //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
 //START_TIMER
-                            decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
+                            decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
 //STOP_TIMER("decode_residual")
                         }
                     }
@@ -1691,7 +1702,7 @@ decode_intra_mb:
             int c;
             for( c = 0; c < 2; c++ ) {
                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
-                decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
+                decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
             }
         }
 
@@ -1702,7 +1713,7 @@ decode_intra_mb:
                 for( i = 0; i < 4; i++ ) {
                     const int index = 16 + 4 * c + i;
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
-                    decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
+                    decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
                 }
             }
         } else {
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 15d84ac32c..61b057176b 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -488,37 +488,44 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
     }
 
-    scantable += zeros_left + total_coeff - 1;
-    if(n >= LUMA_DC_BLOCK_INDEX){
-        block[*scantable] = level[0];
-        for(i=1;i<total_coeff && zeros_left > 0;i++) {
-            if(zeros_left < 7)
-                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
-            else
-                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
-            zeros_left -= run_before;
-            scantable -= 1 + run_before;
-            block[*scantable]= level[i];
-        }
-        for(;i<total_coeff;i++) {
-            scantable--;
-            block[*scantable]= level[i];
-        }
-    }else{
-        block[*scantable] = (level[0] * qmul[*scantable] + 32)>>6;
-        for(i=1;i<total_coeff && zeros_left > 0;i++) {
-            if(zeros_left < 7)
-                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
-            else
-                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
-            zeros_left -= run_before;
-            scantable -= 1 + run_before;
-            block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
-        }
-        for(;i<total_coeff;i++) {
-            scantable--;
-            block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
-        }
+#define STORE_BLOCK(type) \
+    scantable += zeros_left + total_coeff - 1; \
+    if(n >= LUMA_DC_BLOCK_INDEX){ \
+        ((type*)block)[*scantable] = level[0]; \
+        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
+            if(zeros_left < 7) \
+                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
+            else \
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
+            zeros_left -= run_before; \
+            scantable -= 1 + run_before; \
+            ((type*)block)[*scantable]= level[i]; \
+        } \
+        for(;i<total_coeff;i++) { \
+            scantable--; \
+            ((type*)block)[*scantable]= level[i]; \
+        } \
+    }else{ \
+        ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
+        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
+            if(zeros_left < 7) \
+                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
+            else \
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
+            zeros_left -= run_before; \
+            scantable -= 1 + run_before; \
+            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
+        } \
+        for(;i<total_coeff;i++) { \
+            scantable--; \
+            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
+        } \
+    }
+
+    if (h->pixel_shift) {
+        STORE_BLOCK(int32_t)
+    } else {
+        STORE_BLOCK(int16_t)
     }
 
     if(zeros_left<0){
@@ -535,6 +542,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
     int partition_count;
     unsigned int mb_type, cbp;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
+    const int pixel_shift = h->pixel_shift;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
 
@@ -605,7 +613,7 @@ decode_intra_mb:
         align_get_bits(&s->gb);
 
         // The pixels are stored in the same order as levels in h->mb array.
-        for(x=0; x < (CHROMA ? 384 : 256); x++){
+        for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
         }
 
@@ -941,6 +949,8 @@ decode_intra_mb:
         if(IS_INTRA16x16(mb_type)){
             AV_ZERO128(h->mb_luma_dc+0);
             AV_ZERO128(h->mb_luma_dc+8);
+            AV_ZERO128(h->mb_luma_dc+16);
+            AV_ZERO128(h->mb_luma_dc+24);
             if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
                 return -1; //FIXME continue if partitioned and other return -1 too
             }
@@ -951,7 +961,7 @@ decode_intra_mb:
                 for(i8x8=0; i8x8<4; i8x8++){
                     for(i4x4=0; i4x4<4; i4x4++){
                         const int index= i4x4 + 4*i8x8;
-                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
+                        if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
                             return -1;
                         }
                     }
@@ -963,7 +973,7 @@ decode_intra_mb:
             for(i8x8=0; i8x8<4; i8x8++){
                 if(cbp & (1<<i8x8)){
                     if(IS_8x8DCT(mb_type)){
-                        DCTELEM *buf = &h->mb[64*i8x8];
+                        DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
                         uint8_t *nnz;
                         for(i4x4=0; i4x4<4; i4x4++){
                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
@@ -976,7 +986,7 @@ decode_intra_mb:
                         for(i4x4=0; i4x4<4; i4x4++){
                             const int index= i4x4 + 4*i8x8;
 
-                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
+                            if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
                                 return -1;
                             }
                         }
@@ -990,7 +1000,7 @@ decode_intra_mb:
 
         if(cbp&0x30){
             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
+                if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
                     return -1;
                 }
         }
@@ -1000,7 +1010,7 @@ decode_intra_mb:
                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
                 for(i4x4=0; i4x4<4; i4x4++){
                     const int index= 16 + 4*chroma_idx + i4x4;
-                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
+                    if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
                         return -1;
                     }
                 }
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index a08ff69931..72b1905936 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -544,10 +544,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
         if( dir == 0 ) {
-            filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h );
+            filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
             if( (edge&1) == 0 ) {
-                filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h);
+                filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
             }
         } else {
             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index e356ca08cb..9a19cde9cc 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -273,6 +273,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
         int unaligned;
         AVPicture picture;
         int stride_align[4];
+        const int pixel_size = av_pix_fmt_descriptors[s->pix_fmt].comp[0].step_minus1+1;
 
         avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
 
@@ -322,7 +323,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
             if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2])
                 buf->data[i] = buf->base[i];
             else
-                buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), stride_align[i]);
+                buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (pixel_size*EDGE_WIDTH>>h_shift), stride_align[i]);
         }
         if(size[1] && !size[2])
             ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt);
-- 
cgit v1.2.3


From fcc0224e4fbd44ae268903185b0cf83560b13555 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:58 +0200
Subject: Add support for higher QP values in h264.

In high bit depth, the QP values may now be up to (51 + 6*(bit_depth-8)).

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264.c       | 19 +++++++++++--------
 libavcodec/h264.h       |  7 ++++---
 libavcodec/h264_cabac.c | 12 +++++++-----
 libavcodec/h264_cavlc.c |  9 +++++----
 libavcodec/h264_ps.c    | 47 +++++++++++++++++++++++++++++++++++------------
 5 files changed, 62 insertions(+), 32 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index e02b277769..0fcb7dbbdb 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -44,12 +44,12 @@
 //#undef NDEBUG
 #include <assert.h>
 
-static const uint8_t rem6[52]={
-0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+static const uint8_t rem6[QP_MAX_NUM+1]={
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 };
 
-static const uint8_t div6[52]={
-0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+static const uint8_t div6[QP_MAX_NUM+1]={
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
 };
 
 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
@@ -658,6 +658,7 @@ static void free_tables(H264Context *h, int free_rbsp){
 
 static void init_dequant8_coeff_table(H264Context *h){
     int i,q,x;
+    const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
     h->dequant8_coeff[0] = h->dequant8_buffer[0];
     h->dequant8_coeff[1] = h->dequant8_buffer[1];
 
@@ -667,7 +668,7 @@ static void init_dequant8_coeff_table(H264Context *h){
             break;
         }
 
-        for(q=0; q<52; q++){
+        for(q=0; q<max_qp+1; q++){
             int shift = div6[q];
             int idx = rem6[q];
             for(x=0; x<64; x++)
@@ -680,6 +681,7 @@ static void init_dequant8_coeff_table(H264Context *h){
 
 static void init_dequant4_coeff_table(H264Context *h){
     int i,j,q,x;
+    const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
     for(i=0; i<6; i++ ){
         h->dequant4_coeff[i] = h->dequant4_buffer[i];
         for(j=0; j<i; j++){
@@ -691,7 +693,7 @@ static void init_dequant4_coeff_table(H264Context *h){
         if(j<i)
             continue;
 
-        for(q=0; q<52; q++){
+        for(q=0; q<max_qp+1; q++){
             int shift = div6[q] + 2;
             int idx = rem6[q];
             for(x=0; x<16; x++)
@@ -893,6 +895,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     ff_h264_decode_init_vlc();
 
     h->pixel_shift = 0;
+    h->sps.bit_depth_luma = 8;
 
     h->thread_context[0] = h;
     h->outputed_poc = INT_MIN;
@@ -1387,7 +1390,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
                     for(i=16; i<16+8; i++){
                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
-                            ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
+                            ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
                         }
                     }
                 }
@@ -2185,7 +2188,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
     h->last_qscale_diff = 0;
     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
-    if(tmp>51){
+    if(tmp>51+6*(h->sps.bit_depth_luma-8)){
         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
         return -1;
     }
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 470d9bee63..035927a09a 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -108,6 +108,7 @@
  */
 #define DELAYED_PIC_REF 4
 
+#define QP_MAX_NUM (51 + 2*6)           // The maximum supported qp
 
 /* NAL unit types */
 enum {
@@ -354,8 +355,8 @@ typedef struct H264Context{
      */
     PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
 
-    uint32_t dequant4_buffer[6][52][16]; //FIXME should these be moved down?
-    uint32_t dequant8_buffer[2][52][64];
+    uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
+    uint32_t dequant8_buffer[2][QP_MAX_NUM+1][64];
     uint32_t (*dequant4_coeff[6])[16];
     uint32_t (*dequant8_coeff[2])[64];
 
@@ -601,7 +602,7 @@ typedef struct H264Context{
 }H264Context;
 
 
-extern const uint8_t ff_h264_chroma_qp[52];
+extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10).
 
 /**
  * Decode SEI
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 61ad4493f9..69af1e2ded 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -689,13 +689,14 @@ void ff_h264_init_cabac_states(H264Context *h) {
     MpegEncContext * const s = &h->s;
     int i;
     const int8_t (*tab)[2];
+    const int slice_qp = av_clip(s->qscale - 6*(h->sps.bit_depth_luma-8), 0, 51);
 
     if( h->slice_type_nos == AV_PICTURE_TYPE_I ) tab = cabac_context_init_I;
     else                                 tab = cabac_context_init_PB[h->cabac_init_idc];
 
     /* calculate pre-state */
     for( i= 0; i < 460; i++ ) {
-        int pre = 2*(((tab[i][0] * s->qscale) >>4 ) + tab[i][1]) - 127;
+        int pre = 2*(((tab[i][0] * slice_qp) >>4 ) + tab[i][1]) - 127;
 
         pre^= pre>>31;
         if(pre > 124)
@@ -1631,11 +1632,12 @@ decode_intra_mb:
         if(get_cabac_noinline( &h->cabac, &h->cabac_state[60 + (h->last_qscale_diff != 0)])){
             int val = 1;
             int ctx= 2;
+            const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
 
             while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
                 ctx= 3;
                 val++;
-                if(val > 102){ //prevent infinite loop
+                if(val > 2*max_qp){ //prevent infinite loop
                     av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
                     return -1;
                 }
@@ -1647,9 +1649,9 @@ decode_intra_mb:
                 val= -((val + 1)>>1);
             h->last_qscale_diff = val;
             s->qscale += val;
-            if(((unsigned)s->qscale) > 51){
-                if(s->qscale<0) s->qscale+= 52;
-                else            s->qscale-= 52;
+            if(((unsigned)s->qscale) > max_qp){
+                if(s->qscale<0) s->qscale+= max_qp+1;
+                else            s->qscale-= max_qp+1;
             }
             h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
             h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 61b057176b..2e5ea54679 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -922,6 +922,7 @@ decode_intra_mb:
         int dquant;
         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
         const uint8_t *scan, *scan8x8;
+        const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
 
         if(IS_INTERLACED(mb_type)){
             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
@@ -935,10 +936,10 @@ decode_intra_mb:
 
         s->qscale += dquant;
 
-        if(((unsigned)s->qscale) > 51){
-            if(s->qscale<0) s->qscale+= 52;
-            else            s->qscale-= 52;
-            if(((unsigned)s->qscale) > 51){
+        if(((unsigned)s->qscale) > max_qp){
+            if(s->qscale<0) s->qscale+= max_qp+1;
+            else            s->qscale-= max_qp+1;
+            if(((unsigned)s->qscale) > max_qp){
                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
                 return -1;
             }
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index 44b83817d4..a98f14aaf6 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -57,11 +57,32 @@ static const AVRational pixel_aspect[17]={
  {2, 1},
 };
 
-const uint8_t ff_h264_chroma_qp[52]={
-    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,
-   12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
-   28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,
-   37,38,38,38,39,39,39,39
+#define QP(qP,depth) ( (qP)+6*((depth)-8) )
+
+#define CHROMA_QP_TABLE_END(d) \
+     QP(0,d),  QP(1,d),  QP(2,d),  QP(3,d),  QP(4,d),  QP(5,d),\
+     QP(6,d),  QP(7,d),  QP(8,d),  QP(9,d), QP(10,d), QP(11,d),\
+    QP(12,d), QP(13,d), QP(14,d), QP(15,d), QP(16,d), QP(17,d),\
+    QP(18,d), QP(19,d), QP(20,d), QP(21,d), QP(22,d), QP(23,d),\
+    QP(24,d), QP(25,d), QP(26,d), QP(27,d), QP(28,d), QP(29,d),\
+    QP(29,d), QP(30,d), QP(31,d), QP(32,d), QP(32,d), QP(33,d),\
+    QP(34,d), QP(34,d), QP(35,d), QP(35,d), QP(36,d), QP(36,d),\
+    QP(37,d), QP(37,d), QP(37,d), QP(38,d), QP(38,d), QP(38,d),\
+    QP(39,d), QP(39,d), QP(39,d), QP(39,d)
+
+const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1] = {
+    {
+        CHROMA_QP_TABLE_END(8)
+    },
+    {
+        0, 1, 2, 3, 4, 5,
+        CHROMA_QP_TABLE_END(9)
+    },
+    {
+        0, 1, 2, 3,  4,  5,
+        6, 7, 8, 9, 10, 11,
+        CHROMA_QP_TABLE_END(10)
+    },
 };
 
 static const uint8_t default_scaling4[2][16]={
@@ -419,17 +440,19 @@ fail:
 }
 
 static void
-build_qp_table(PPS *pps, int t, int index)
+build_qp_table(PPS *pps, int t, int index, const int depth)
 {
     int i;
-    for(i = 0; i < 52; i++)
-        pps->chroma_qp_table[t][i] = ff_h264_chroma_qp[av_clip(i + index, 0, 51)];
+    const int max_qp = 51 + 6*(depth-8);
+    for(i = 0; i < max_qp+1; i++)
+        pps->chroma_qp_table[t][i] = ff_h264_chroma_qp[depth-8][av_clip(i + index, 0, max_qp)];
 }
 
 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
     MpegEncContext * const s = &h->s;
     unsigned int pps_id= get_ue_golomb(&s->gb);
     PPS *pps;
+    const int qp_bd_offset = 6*(h->sps.bit_depth_luma-8);
 
     if(pps_id >= MAX_PPS_COUNT) {
         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
@@ -494,8 +517,8 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
 
     pps->weighted_pred= get_bits1(&s->gb);
     pps->weighted_bipred_idc= get_bits(&s->gb, 2);
-    pps->init_qp= get_se_golomb(&s->gb) + 26;
-    pps->init_qs= get_se_golomb(&s->gb) + 26;
+    pps->init_qp= get_se_golomb(&s->gb) + 26 + qp_bd_offset;
+    pps->init_qs= get_se_golomb(&s->gb) + 26 + qp_bd_offset;
     pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
     pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
     pps->constrained_intra_pred= get_bits1(&s->gb);
@@ -514,8 +537,8 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
         pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
     }
 
-    build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
-    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
+    build_qp_table(pps, 0, pps->chroma_qp_index_offset[0], h->sps.bit_depth_luma);
+    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1], h->sps.bit_depth_luma);
     if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
         pps->chroma_qp_diff= 1;
 
-- 
cgit v1.2.3


From 19a0729b4cfacfd90b8ee84ab0c093ff7e397e65 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:48:59 +0200
Subject: Adds 8-, 9- and 10-bit versions of some of the functions used by the
 h264 decoder.

This patch lets e.g. dsputil_init chose dsp functions with respect to
the bit depth to decode. The naming scheme of bit depth dependent
functions is <base name>_<bit depth>[_<prefix>] (i.e. the old
clear_blocks_c is now named clear_blocks_8_c).

Note: Some of the functions for high bit depth is not dependent on the
bit depth, but only on the pixel size. This leaves some room for
optimizing binary size.

Preparatory patch for high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/alpha/dsputil_alpha.c     |   4 +
 libavcodec/arm/dsputil_init_arm.c    |   4 +
 libavcodec/arm/dsputil_init_armv6.c  |   4 +
 libavcodec/arm/dsputil_init_neon.c   |   6 +
 libavcodec/arm/dsputil_iwmmxt.c      |   3 +
 libavcodec/arm/h264dsp_init_arm.c    |   8 +-
 libavcodec/arm/h264pred_init_arm.c   |  11 +-
 libavcodec/bfin/dsputil_bfin.c       |   5 +
 libavcodec/dsputil.c                 | 282 +++++++++++++++++++++--------------
 libavcodec/dsputil.h                 |  71 ++++++---
 libavcodec/dsputil_template.c        |  80 +++++++---
 libavcodec/h264.c                    |  22 ++-
 libavcodec/h264dsp.c                 | 120 +++++++++------
 libavcodec/h264dsp.h                 |   8 +-
 libavcodec/h264dsp_template.c        |   5 +-
 libavcodec/h264idct.c                |  10 ++
 libavcodec/h264idct_template.c       |   8 +-
 libavcodec/h264pred.c                | 278 +++++++++++++++++++---------------
 libavcodec/h264pred.h                |   6 +-
 libavcodec/h264pred_template.c       |  16 +-
 libavcodec/high_bit_depth.h          |  85 +++++++++++
 libavcodec/mlib/dsputil_mlib.c       |   4 +
 libavcodec/ppc/dsputil_altivec.c     |   5 +
 libavcodec/ppc/dsputil_ppc.c         |   4 +
 libavcodec/ppc/h264_altivec.c        |   7 +-
 libavcodec/ps2/dsputil_mmi.c         |   3 +
 libavcodec/rv34.c                    |   2 +-
 libavcodec/sh4/dsputil_align.c       |   8 +
 libavcodec/sh4/dsputil_sh4.c         |   2 +
 libavcodec/sparc/dsputil_vis.c       |   3 +
 libavcodec/vp8.c                     |   2 +-
 libavcodec/x86/dsputil_mmx.c         |  32 ++++
 libavcodec/x86/h264_intrapred_init.c |   6 +-
 libavcodec/x86/h264dsp_mmx.c         |   4 +-
 34 files changed, 753 insertions(+), 365 deletions(-)
 create mode 100644 libavcodec/high_bit_depth.h

(limited to 'libavcodec')

diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 610f92acbb..32bb0fc932 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -270,6 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
 
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
     c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
     c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
@@ -311,6 +314,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
     c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
 
     c->clear_blocks = clear_blocks_axp;
+    }
 
     /* amask clears all bits that correspond to present features.  */
     if (amask(AMASK_MVI) == 0) {
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 92796c3145..777a2f954e 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -75,6 +75,8 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
 
 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     ff_put_pixels_clamped = c->put_pixels_clamped;
     ff_add_pixels_clamped = c->add_pixels_clamped;
 
@@ -95,6 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
 
     c->add_pixels_clamped = ff_add_pixels_clamped_arm;
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
@@ -112,6 +115,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
     c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm;
     c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
     c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
+    }
 
     if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
     if (HAVE_ARMV6)   ff_dsputil_init_armv6(c, avctx);
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 362050c6ea..7584aeefc6 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -72,6 +72,8 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
 
 void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO ||
                            avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) {
         c->idct_put              = ff_simple_idct_put_armv6;
@@ -80,6 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
         c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
     }
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
@@ -100,6 +103,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
 
     c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6;
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
+    }
 
     c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
     c->get_pixels = ff_get_pixels_armv6;
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index cd58011a2c..3bc053c864 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -173,6 +173,8 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
 
 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     if (!avctx->lowres) {
         if (avctx->idct_algo == FF_IDCT_AUTO ||
             avctx->idct_algo == FF_IDCT_SIMPLENEON) {
@@ -190,6 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
         }
     }
 
+    if (!high_bit_depth) {
     c->clear_block  = ff_clear_block_neon;
     c->clear_blocks = ff_clear_blocks_neon;
 
@@ -213,12 +216,14 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 
     c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
+    }
 
     c->add_pixels_clamped = ff_add_pixels_clamped_neon;
     c->put_pixels_clamped = ff_put_pixels_clamped_neon;
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
 
     if (CONFIG_H264_DECODER) {
+        if (!high_bit_depth) {
         c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
         c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
         c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
@@ -294,6 +299,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
         c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
         c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
         c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
+        }
     }
 
     if (CONFIG_VP3_DECODER) {
diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
index e83edb5bb8..86f8fddfcf 100644
--- a/libavcodec/arm/dsputil_iwmmxt.c
+++ b/libavcodec/arm/dsputil_iwmmxt.c
@@ -155,6 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
     if (avctx->dsp_mask) {
         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -167,6 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
 
     c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
 
+    if (!high_bit_depth) {
     c->clear_blocks = clear_blocks_iwmmxt;
 
     c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
@@ -204,4 +206,5 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
     c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
     c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
     c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
+    }
 }
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index 20f5ac2076..c2399e50ff 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -92,8 +92,9 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
                              DCTELEM *block, int stride,
                              const uint8_t nnzc[6*8]);
 
-static void ff_h264dsp_init_neon(H264DSPContext *c)
+static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
 {
+    if (bit_depth == 8) {
     c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
     c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
     c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
@@ -125,9 +126,10 @@ static void ff_h264dsp_init_neon(H264DSPContext *c)
     c->h264_idct8_add       = ff_h264_idct8_add_neon;
     c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
     c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
+    }
 }
 
-void ff_h264dsp_init_arm(H264DSPContext *c)
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
 {
-    if (HAVE_NEON) ff_h264dsp_init_neon(c);
+    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
 }
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index 3f1c5c64fd..cae32d7567 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -42,8 +42,13 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
 void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
 void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
 
-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
 {
+    const int high_depth = bit_depth > 8;
+
+    if (high_depth)
+        return;
+
     h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
     h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
     if (codec_id != CODEC_ID_VP8)
@@ -69,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
         h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
 }
 
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id)
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, bit_depth)
 {
-    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id);
+    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth);
 }
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index 65d0308018..0db2d8baf8 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -197,11 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
 
 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     c->get_pixels         = ff_bfin_get_pixels;
     c->diff_pixels        = ff_bfin_diff_pixels;
     c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
     c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
 
+    if (!high_bit_depth)
     c->clear_blocks       = bfin_clear_blocks;
     c->pix_sum            = ff_bfin_pix_sum;
     c->pix_norm1          = ff_bfin_pix_norm1;
@@ -228,6 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
     c->sse[1] = ff_bfin_sse8;
     c->sse[2] = ff_bfin_sse4;
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = bfin_put_pixels16;
     c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
     c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
@@ -247,6 +251,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
     c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
     c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
 /*     c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
+    }
 
     if (avctx->dct_algo == FF_DCT_AUTO)
         c->fdct               = ff_bfin_fdct;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 078e172d00..4389289d82 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -43,6 +43,15 @@
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t ff_squareTbl[512] = {0, };
 
+#define BIT_DEPTH 9
+#include "dsputil_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "dsputil_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 8
 #include "dsputil_template.c"
 
 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
@@ -619,10 +628,10 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
 
 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     switch(width){
-    case 2: put_pixels2_c (dst, src, stride, height); break;
-    case 4: put_pixels4_c (dst, src, stride, height); break;
-    case 8: put_pixels8_c (dst, src, stride, height); break;
-    case 16:put_pixels16_c(dst, src, stride, height); break;
+    case 2: put_pixels2_8_c (dst, src, stride, height); break;
+    case 4: put_pixels4_8_c (dst, src, stride, height); break;
+    case 8: put_pixels8_8_c (dst, src, stride, height); break;
+    case 16:put_pixels16_8_c(dst, src, stride, height); break;
     }
 }
 
@@ -716,10 +725,10 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
 
 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     switch(width){
-    case 2: avg_pixels2_c (dst, src, stride, height); break;
-    case 4: avg_pixels4_c (dst, src, stride, height); break;
-    case 8: avg_pixels8_c (dst, src, stride, height); break;
-    case 16:avg_pixels16_c(dst, src, stride, height); break;
+    case 2: avg_pixels2_8_c (dst, src, stride, height); break;
+    case 4: avg_pixels4_8_c (dst, src, stride, height); break;
+    case 8: avg_pixels8_8_c (dst, src, stride, height); break;
+    case 16:avg_pixels16_8_c(dst, src, stride, height); break;
     }
 }
 
@@ -953,7 +962,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst
 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
-    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
 }\
 \
 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -963,7 +972,7 @@ static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
-    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
 }\
 \
 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -971,7 +980,7 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
-    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
 }\
 \
 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -985,7 +994,7 @@ static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
-    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -996,7 +1005,7 @@ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1004,9 +1013,9 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1017,7 +1026,7 @@ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1025,9 +1034,9 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1038,7 +1047,7 @@ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1046,9 +1055,9 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1059,7 +1068,7 @@ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+    OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1067,23 +1076,23 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[64];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[72];\
     uint8_t halfHV[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[72];\
     uint8_t halfHV[64];\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
@@ -1094,14 +1103,14 @@ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
     uint8_t halfH[72];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
 }\
 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1113,14 +1122,14 @@ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
-    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
 }\
 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[16*9];\
     uint8_t halfH[72];\
     copy_block9(full, src, 16, stride, 9);\
     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
-    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
 }\
 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1132,7 +1141,7 @@ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
-    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
 }\
 \
 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1142,7 +1151,7 @@ static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
-    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
 }\
 \
 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1150,7 +1159,7 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
-    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
 }\
 \
 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1164,7 +1173,7 @@ static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t half[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
-    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1175,7 +1184,7 @@ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1183,9 +1192,9 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1196,7 +1205,7 @@ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1204,9 +1213,9 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1217,7 +1226,7 @@ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1225,9 +1234,9 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1238,7 +1247,7 @@ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+    OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1246,23 +1255,23 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfHV[256];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[272];\
     uint8_t halfHV[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t halfH[272];\
     uint8_t halfHV[256];\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
@@ -1273,14 +1282,14 @@ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
     uint8_t halfH[272];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
 }\
 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1292,14 +1301,14 @@ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
-    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
 }\
 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[24*17];\
     uint8_t halfH[272];\
     copy_block17(full, src, 24, stride, 17);\
     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
-    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
 }\
 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1327,7 +1336,7 @@ QPEL_MC(0, avg_       , _       , op_avg)
 #define put_qpel16_mc00_c ff_put_pixels16x16_c
 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
-#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
+#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
 
 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -1349,16 +1358,16 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
 
 #if CONFIG_RV40_DECODER
 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    put_pixels16_xy2_c(dst, src, stride, 16);
+    put_pixels16_xy2_8_c(dst, src, stride, 16);
 }
 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    avg_pixels16_xy2_c(dst, src, stride, 16);
+    avg_pixels16_xy2_8_c(dst, src, stride, 16);
 }
 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    put_pixels8_xy2_c(dst, src, stride, 8);
+    put_pixels8_xy2_8_c(dst, src, stride, 8);
 }
 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
-    avg_pixels8_xy2_c(dst, src, stride, 8);
+    avg_pixels8_xy2_8_c(dst, src, stride, 8);
 }
 #endif /* CONFIG_RV40_DECODER */
 
@@ -1394,7 +1403,7 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t half[64];
     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
-    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+    put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
 }
 
 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
@@ -1404,7 +1413,7 @@ static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t half[64];
     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
-    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+    put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
 }
 
 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
@@ -1418,7 +1427,7 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
-    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
 }
 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t halfH[88];
@@ -1427,7 +1436,7 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
-    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
 }
 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
     uint8_t halfH[88];
@@ -2863,8 +2872,24 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
             c->idct_put= ff_jref_idct4_put;
             c->idct_add= ff_jref_idct4_add;
         }else{
-            c->idct_put= ff_h264_lowres_idct_put_c;
-            c->idct_add= ff_h264_lowres_idct_add_c;
+            if (avctx->codec_id != CODEC_ID_H264) {
+                c->idct_put= ff_h264_lowres_idct_put_8_c;
+                c->idct_add= ff_h264_lowres_idct_add_8_c;
+            } else {
+                switch (avctx->bits_per_raw_sample) {
+                    case 9:
+                        c->idct_put= ff_h264_lowres_idct_put_9_c;
+                        c->idct_add= ff_h264_lowres_idct_add_9_c;
+                        break;
+                    case 10:
+                        c->idct_put= ff_h264_lowres_idct_put_10_c;
+                        c->idct_add= ff_h264_lowres_idct_add_10_c;
+                        break;
+                    default:
+                        c->idct_put= ff_h264_lowres_idct_put_8_c;
+                        c->idct_add= ff_h264_lowres_idct_add_8_c;
+                }
+            }
         }
         c->idct    = j_rev_dct4;
         c->idct_permutation_type= FF_NO_IDCT_PERM;
@@ -2922,14 +2947,9 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
     c->put_pixels_nonclamped = put_pixels_nonclamped_c;
     c->add_pixels_clamped = ff_add_pixels_clamped_c;
-    c->add_pixels8 = add_pixels8_c;
-    c->add_pixels4 = add_pixels4_c;
     c->sum_abs_dctelem = sum_abs_dctelem_c;
-    c->emulated_edge_mc = ff_emulated_edge_mc;
     c->gmc1 = gmc1_c;
     c->gmc = ff_gmc_c;
-    c->clear_block = clear_block_c;
-    c->clear_blocks = clear_blocks_c;
     c->pix_sum = pix_sum_c;
     c->pix_norm1 = pix_norm1_c;
 
@@ -2947,30 +2967,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->pix_abs[1][2] = pix_abs8_y2_c;
     c->pix_abs[1][3] = pix_abs8_xy2_c;
 
-#define dspfunc(PFX, IDX, NUM) \
-    c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
-    c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c;  \
-    c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c;  \
-    c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
-
-    dspfunc(put, 0, 16);
-    dspfunc(put_no_rnd, 0, 16);
-    dspfunc(put, 1, 8);
-    dspfunc(put_no_rnd, 1, 8);
-    dspfunc(put, 2, 4);
-    dspfunc(put, 3, 2);
-
-    dspfunc(avg, 0, 16);
-    dspfunc(avg_no_rnd, 0, 16);
-    dspfunc(avg, 1, 8);
-    dspfunc(avg_no_rnd, 1, 8);
-    dspfunc(avg, 2, 4);
-    dspfunc(avg, 3, 2);
-#undef dspfunc
-
-    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
-    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
-
     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
@@ -3021,23 +3017,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     dspfunc(avg_qpel, 1, 8);
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
 
-    dspfunc(put_h264_qpel, 0, 16);
-    dspfunc(put_h264_qpel, 1, 8);
-    dspfunc(put_h264_qpel, 2, 4);
-    dspfunc(put_h264_qpel, 3, 2);
-    dspfunc(avg_h264_qpel, 0, 16);
-    dspfunc(avg_h264_qpel, 1, 8);
-    dspfunc(avg_h264_qpel, 2, 4);
-
 #undef dspfunc
-    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
-    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
-    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
-    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
-    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
-    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
-
-    c->draw_edges = draw_edges_c;
 
 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
     ff_mlp_init(c, avctx);
@@ -3162,6 +3142,92 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
 
+#undef FUNC
+#undef FUNCC
+#define FUNC(f, depth) f ## _ ## depth
+#define FUNCC(f, depth) f ## _ ## depth ## _c
+
+#define dspfunc1(PFX, IDX, NUM, depth)\
+    c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
+    c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
+    c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
+    c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
+
+#define dspfunc2(PFX, IDX, NUM, depth)\
+    c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
+    c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
+    c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
+    c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
+    c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
+    c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
+    c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
+    c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
+    c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
+    c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
+    c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
+    c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
+    c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
+    c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
+    c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
+    c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
+
+
+#define BIT_DEPTH_FUNCS(depth)\
+    c->draw_edges                    = FUNCC(draw_edges            , depth);\
+    c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
+    c->clear_block                   = FUNCC(clear_block           , depth);\
+    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
+    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
+    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
+    c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
+    c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
+\
+    c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
+    c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
+    c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
+    c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
+    c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
+    c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
+\
+    dspfunc1(put       , 0, 16, depth);\
+    dspfunc1(put       , 1,  8, depth);\
+    dspfunc1(put       , 2,  4, depth);\
+    dspfunc1(put       , 3,  2, depth);\
+    dspfunc1(put_no_rnd, 0, 16, depth);\
+    dspfunc1(put_no_rnd, 1,  8, depth);\
+    dspfunc1(avg       , 0, 16, depth);\
+    dspfunc1(avg       , 1,  8, depth);\
+    dspfunc1(avg       , 2,  4, depth);\
+    dspfunc1(avg       , 3,  2, depth);\
+    dspfunc1(avg_no_rnd, 0, 16, depth);\
+    dspfunc1(avg_no_rnd, 1,  8, depth);\
+\
+    dspfunc2(put_h264_qpel, 0, 16, depth);\
+    dspfunc2(put_h264_qpel, 1,  8, depth);\
+    dspfunc2(put_h264_qpel, 2,  4, depth);\
+    dspfunc2(put_h264_qpel, 3,  2, depth);\
+    dspfunc2(avg_h264_qpel, 0, 16, depth);\
+    dspfunc2(avg_h264_qpel, 1,  8, depth);\
+    dspfunc2(avg_h264_qpel, 2,  4, depth);
+
+    if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
+        BIT_DEPTH_FUNCS(8)
+    } else {
+        switch (avctx->bits_per_raw_sample) {
+            case 9:
+                BIT_DEPTH_FUNCS(9)
+                break;
+            case 10:
+                BIT_DEPTH_FUNCS(10)
+                break;
+            default:
+                av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
+                BIT_DEPTH_FUNCS(8)
+                break;
+        }
+    }
+
+
     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
     if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 9b4aef758a..78d21527a5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -53,19 +53,24 @@ void ff_fdct_mmx(DCTELEM *block);
 void ff_fdct_mmx2(DCTELEM *block);
 void ff_fdct_sse2(DCTELEM *block);
 
-void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
-void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
-void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-
-void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul);
-void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
+#define H264_IDCT(depth) \
+void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_lowres_idct_add_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
+void ff_h264_lowres_idct_put_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
+void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
+void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
+
+H264_IDCT( 8)
+H264_IDCT( 9)
+H264_IDCT(10)
+
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
 
@@ -82,10 +87,20 @@ extern const uint8_t ff_zigzag248_direct[64];
 extern uint32_t ff_squareTbl[512];
 extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
 
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
+#define PUTAVG_PIXELS(depth)\
+void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);
+
+PUTAVG_PIXELS( 8)
+PUTAVG_PIXELS( 9)
+PUTAVG_PIXELS(10)
+
+#define ff_put_pixels8x8_c ff_put_pixels8x8_8_c
+#define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c
+#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
+#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
 
 /* VP3 DSP functions */
 void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
@@ -187,10 +202,17 @@ typedef struct ScanTable{
 
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
 
-void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize,
-                         int block_w, int block_h,
+#define EMULATED_EDGE(depth) \
+void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
+                         int block_w, int block_h,\
                          int src_x, int src_y, int w, int h);
 
+EMULATED_EDGE(8)
+EMULATED_EDGE(9)
+EMULATED_EDGE(10)
+
+#define ff_emulated_edge_mc ff_emulated_edge_mc_8
+
 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
 void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
@@ -562,6 +584,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
 
 #define         BYTE_VEC32(c)   ((c)*0x01010101UL)
+#define         BYTE_VEC64(c)   ((c)*0x0001000100010001UL)
 
 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
 {
@@ -573,6 +596,16 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
     return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
 }
 
+static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
+static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
 static inline int get_penalty_factor(int lambda, int lambda2, int type){
     switch(type&0xFF){
     default:
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index f69c4671f9..8ca6d3e414 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -27,25 +27,55 @@
  * DSP utils
  */
 
-#include "dsputil.h"
+#include "high_bit_depth.h"
 
-#define BIT_DEPTH 8
+static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN2P(dst   , AV_RN2P(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
-#define pixel  uint8_t
-#define pixel2 uint16_t
-#define pixel4 uint32_t
-#define dctcoef int16_t
+static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN4P(dst   , AV_RN4P(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
-#define FUNC(a)  a
-#define FUNCC(a) a ## _c
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define AV_RN2P AV_RN16
-#define AV_RN4P AV_RN32
-#define PIXEL_MAX ((1<<BIT_DEPTH)-1)
+static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN4P(dst                , AV_RN4P(src                ));
+        AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
-#define no_rnd_avg_pixel4 no_rnd_avg32
-#define    rnd_avg_pixel4    rnd_avg32
+static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN4P(dst                 , AV_RN4P(src                 ));
+        AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
+        AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
+        AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
 
 /* draw the edges of width 'w' of an image of size width, height */
 //FIXME check that this is ok for mpeg4 interlaced
@@ -1317,10 +1347,22 @@ H264_MC(avg_, 16)
 #undef op2_avg
 #undef op2_put
 
-#define put_h264_qpel8_mc00_c  ff_put_pixels8x8_c
-#define avg_h264_qpel8_mc00_c  ff_avg_pixels8x8_c
-#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
-#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
+#if BIT_DEPTH == 8
+#   define put_h264_qpel8_mc00_8_c  ff_put_pixels8x8_8_c
+#   define avg_h264_qpel8_mc00_8_c  ff_avg_pixels8x8_8_c
+#   define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
+#   define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
+#elif BIT_DEPTH == 9
+#   define put_h264_qpel8_mc00_9_c  ff_put_pixels8x8_9_c
+#   define avg_h264_qpel8_mc00_9_c  ff_avg_pixels8x8_9_c
+#   define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
+#   define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
+#elif BIT_DEPTH == 10
+#   define put_h264_qpel8_mc00_10_c  ff_put_pixels8x8_10_c
+#   define avg_h264_qpel8_mc00_10_c  ff_avg_pixels8x8_10_c
+#   define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
+#   define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
+#endif
 
 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
     FUNCC(put_pixels8)(dst, src, stride, 8);
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 0fcb7dbbdb..1388dd5c09 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -783,7 +783,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
     dst->list_counts              = src->list_counts;
 
     dst->s.obmc_scratchpad = NULL;
-    ff_h264_pred_init(&dst->hpc, src->s.codec_id);
+    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
 }
 
 /**
@@ -811,8 +811,8 @@ static av_cold void common_init(H264Context *h){
     s->height = s->avctx->height;
     s->codec_id= s->avctx->codec->id;
 
-    ff_h264dsp_init(&h->h264dsp);
-    ff_h264_pred_init(&h->hpc, s->codec_id);
+    ff_h264dsp_init(&h->h264dsp, 8);
+    ff_h264_pred_init(&h->hpc, s->codec_id, 8);
 
     h->dequant_coeff_pps= -1;
     s->unrestricted_mv=1;
@@ -895,7 +895,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     ff_h264_decode_init_vlc();
 
     h->pixel_shift = 0;
-    h->sps.bit_depth_luma = 8;
+    h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
 
     h->thread_context[0] = h;
     h->outputed_poc = INT_MIN;
@@ -2998,6 +2998,20 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
 
             if(avctx->has_b_frames < 2)
                 avctx->has_b_frames= !s->low_delay;
+
+            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
+                if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
+                    avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
+                    h->pixel_shift = h->sps.bit_depth_luma > 8;
+
+                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
+                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
+                    dsputil_init(&s->dsp, s->avctx);
+                } else {
+                    av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
+                    return -1;
+                }
+            }
             break;
         case NAL_PPS:
             init_get_bits(&s->gb, ptr, bit_length);
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 22b2086243..64f4856189 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -29,57 +29,83 @@
 #include "avcodec.h"
 #include "h264dsp.h"
 
+#define BIT_DEPTH 8
 #include "h264dsp_template.c"
+#undef BIT_DEPTH
 
-void ff_h264dsp_init(H264DSPContext *c)
-{
-    c->h264_idct_add= ff_h264_idct_add_c;
-    c->h264_idct8_add= ff_h264_idct8_add_c;
-    c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
-    c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
-    c->h264_idct_add16     = ff_h264_idct_add16_c;
-    c->h264_idct8_add4     = ff_h264_idct8_add4_c;
-    c->h264_idct_add8      = ff_h264_idct_add8_c;
-    c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
-    c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c;
-    c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c;
+#define BIT_DEPTH 9
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
 
-    c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
-    c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
-    c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
-    c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
-    c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
-    c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
-    c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
-    c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
-    c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
-    c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
-    c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
-    c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
-    c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
-    c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
-    c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
-    c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
-    c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
-    c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
-    c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
-    c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
+{
+#undef FUNC
+#define FUNC(a, depth) a ## _ ## depth ## _c
 
-    c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
-    c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
-    c->h264_h_loop_filter_luma_mbaff= h264_h_loop_filter_luma_mbaff_c;
-    c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
-    c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
-    c->h264_h_loop_filter_luma_mbaff_intra= h264_h_loop_filter_luma_mbaff_intra_c;
-    c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
-    c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
-    c->h264_h_loop_filter_chroma_mbaff= h264_h_loop_filter_chroma_mbaff_c;
-    c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
-    c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
-    c->h264_h_loop_filter_chroma_mbaff_intra= h264_h_loop_filter_chroma_mbaff_intra_c;
+#define H264_DSP(depth) \
+    c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
+    c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
+    c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
+    c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
+    c->h264_idct_add16     = FUNC(ff_h264_idct_add16, depth);\
+    c->h264_idct8_add4     = FUNC(ff_h264_idct8_add4, depth);\
+    c->h264_idct_add8      = FUNC(ff_h264_idct_add8, depth);\
+    c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
+    c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
+    c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
+\
+    c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
+    c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
+    c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
+    c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
+    c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
+    c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
+    c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
+    c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
+    c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
+    c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
+    c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
+    c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
+    c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
+    c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
+    c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
+    c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
+    c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
+    c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
+    c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
+    c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
+\
+    c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
+    c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
+    c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\
+    c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\
+    c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
+    c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
+    c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
+    c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
+    c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
+    c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
+    c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
+    c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
     c->h264_loop_filter_strength= NULL;
 
-    if (ARCH_ARM) ff_h264dsp_init_arm(c);
-    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c);
-    if (HAVE_MMX) ff_h264dsp_init_x86(c);
+    switch (bit_depth) {
+    case 9:
+        H264_DSP(9);
+        break;
+    case 10:
+        H264_DSP(10);
+        break;
+    default:
+        H264_DSP(8);
+        break;
+    }
+
+    if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
+    if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth);
+    if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth);
 }
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 7eb50be342..87a1dd9722 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -75,9 +75,9 @@ typedef struct H264DSPContext{
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
 }H264DSPContext;
 
-void ff_h264dsp_init(H264DSPContext *c);
-void ff_h264dsp_init_arm(H264DSPContext *c);
-void ff_h264dsp_init_ppc(H264DSPContext *c);
-void ff_h264dsp_init_x86(H264DSPContext *c);
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth);
 
 #endif /* AVCODEC_H264DSP_H */
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index 7190f4d9ca..91162ea900 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -25,10 +25,7 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#define BIT_DEPTH 8
-#define pixel uint8_t
-#define av_clip_pixel av_clip_uint8
-#define FUNCC(a) a ## _c
+#include "high_bit_depth.h"
 
 #define op_scale1(x)  block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
 #define op_scale2(x)  dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index d79777571a..1634a00835 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -25,4 +25,14 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
+#define BIT_DEPTH 8
 #include "h264idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "h264idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264idct_template.c"
+#undef BIT_DEPTH
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index ed1f18c9b6..39c9a1c9eb 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -25,7 +25,7 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#include "dsputil.h"
+#include "high_bit_depth.h"
 
 #ifndef AVCODEC_H264IDCT_INTERNAL_H
 #define AVCODEC_H264IDCT_INTERNAL_H
@@ -42,12 +42,6 @@ static const uint8_t scan8[16 + 2*4]={
 };
 #endif
 
-#define pixel  uint8_t
-#define dctcoef DCTELEM
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define FUNCC(a) a ## _c
-
 static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){
     int i;
     INIT_CLIP
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 616a7a93b1..b3701ef3b8 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -26,7 +26,18 @@
  */
 
 #include "h264pred.h"
+
+#define BIT_DEPTH 8
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
 #include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264pred_template.c"
+#undef BIT_DEPTH
 
 static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
     const int lt= src[-1-1*stride];
@@ -245,11 +256,11 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
 }
 
 static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
-    pred16x16_plane_compat_c(src, stride, 1, 0);
+    pred16x16_plane_compat_8_c(src, stride, 1, 0);
 }
 
 static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
-    pred16x16_plane_compat_c(src, stride, 0, 1);
+    pred16x16_plane_compat_8_c(src, stride, 0, 1);
 }
 
 static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
@@ -352,130 +363,149 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
 /**
  * Set the intra prediction function pointers.
  */
-void ff_h264_pred_init(H264PredContext *h, int codec_id){
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
 //    MpegEncContext * const s = &h->s;
 
-    if(codec_id != CODEC_ID_RV40){
-        if(codec_id == CODEC_ID_VP8) {
-            h->pred4x4[VERT_PRED       ]= pred4x4_vertical_vp8_c;
-            h->pred4x4[HOR_PRED        ]= pred4x4_horizontal_vp8_c;
-        } else {
-            h->pred4x4[VERT_PRED       ]= pred4x4_vertical_c;
-            h->pred4x4[HOR_PRED        ]= pred4x4_horizontal_c;
-        }
-        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
-        if(codec_id == CODEC_ID_SVQ3)
-            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c;
-        else
-            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
-        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
-        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
-        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred4x4[VERT_LEFT_PRED  ]= pred4x4_vertical_left_vp8_c;
-        } else
-            h->pred4x4[VERT_LEFT_PRED  ]= pred4x4_vertical_left_c;
-        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
-        if(codec_id != CODEC_ID_VP8) {
-            h->pred4x4[LEFT_DC_PRED    ]= pred4x4_left_dc_c;
-            h->pred4x4[TOP_DC_PRED     ]= pred4x4_top_dc_c;
-            h->pred4x4[DC_128_PRED     ]= pred4x4_128_dc_c;
-        } else {
-            h->pred4x4[TM_VP8_PRED     ]= pred4x4_tm_vp8_c;
-            h->pred4x4[DC_127_PRED     ]= pred4x4_127_dc_c;
-            h->pred4x4[DC_129_PRED     ]= pred4x4_129_dc_c;
-            h->pred4x4[VERT_VP8_PRED   ]= pred4x4_vertical_c;
-            h->pred4x4[HOR_VP8_PRED    ]= pred4x4_horizontal_c;
-        }
-    }else{
-        h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
-        h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
-        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
-        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c;
-        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
-        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
-        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
-        h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_rv40_c;
-        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_rv40_c;
-        h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
-        h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
-        h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
-        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c;
-        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c;
-        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c;
+#undef FUNC
+#undef FUNCC
+#define FUNC(a, depth) a ## _ ## depth
+#define FUNCC(a, depth) a ## _ ## depth ## _c
+#define FUNCD(a) a ## _c
+
+#define H264_PRED(depth) \
+    if(codec_id != CODEC_ID_RV40){\
+        if(codec_id == CODEC_ID_VP8) {\
+            h->pred4x4[VERT_PRED       ]= FUNCD(pred4x4_vertical_vp8);\
+            h->pred4x4[HOR_PRED        ]= FUNCD(pred4x4_horizontal_vp8);\
+        } else {\
+            h->pred4x4[VERT_PRED       ]= FUNCC(pred4x4_vertical          , depth);\
+            h->pred4x4[HOR_PRED        ]= FUNCC(pred4x4_horizontal        , depth);\
+        }\
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
+        if(codec_id == CODEC_ID_SVQ3)\
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
+        else\
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left     , depth);\
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
+        if (codec_id == CODEC_ID_VP8) {\
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCD(pred4x4_vertical_left_vp8);\
+        } else\
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCC(pred4x4_vertical_left     , depth);\
+        h->pred4x4[HOR_UP_PRED         ]= FUNCC(pred4x4_horizontal_up     , depth);\
+        if(codec_id != CODEC_ID_VP8) {\
+            h->pred4x4[LEFT_DC_PRED    ]= FUNCC(pred4x4_left_dc           , depth);\
+            h->pred4x4[TOP_DC_PRED     ]= FUNCC(pred4x4_top_dc            , depth);\
+            h->pred4x4[DC_128_PRED     ]= FUNCC(pred4x4_128_dc            , depth);\
+        } else {\
+            h->pred4x4[TM_VP8_PRED     ]= FUNCD(pred4x4_tm_vp8);\
+            h->pred4x4[DC_127_PRED     ]= FUNCC(pred4x4_127_dc            , depth);\
+            h->pred4x4[DC_129_PRED     ]= FUNCC(pred4x4_129_dc            , depth);\
+            h->pred4x4[VERT_VP8_PRED   ]= FUNCC(pred4x4_vertical          , depth);\
+            h->pred4x4[HOR_VP8_PRED    ]= FUNCC(pred4x4_horizontal        , depth);\
+        }\
+    }else{\
+        h->pred4x4[VERT_PRED           ]= FUNCC(pred4x4_vertical          , depth);\
+        h->pred4x4[HOR_PRED            ]= FUNCC(pred4x4_horizontal        , depth);\
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
+        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
+        h->pred4x4[VERT_LEFT_PRED      ]= FUNCD(pred4x4_vertical_left_rv40);\
+        h->pred4x4[HOR_UP_PRED         ]= FUNCD(pred4x4_horizontal_up_rv40);\
+        h->pred4x4[LEFT_DC_PRED        ]= FUNCC(pred4x4_left_dc           , depth);\
+        h->pred4x4[TOP_DC_PRED         ]= FUNCC(pred4x4_top_dc            , depth);\
+        h->pred4x4[DC_128_PRED         ]= FUNCC(pred4x4_128_dc            , depth);\
+        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\
+        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\
+        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\
+    }\
+\
+    h->pred8x8l[VERT_PRED           ]= FUNCC(pred8x8l_vertical            , depth);\
+    h->pred8x8l[HOR_PRED            ]= FUNCC(pred8x8l_horizontal          , depth);\
+    h->pred8x8l[DC_PRED             ]= FUNCC(pred8x8l_dc                  , depth);\
+    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left           , depth);\
+    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right          , depth);\
+    h->pred8x8l[VERT_RIGHT_PRED     ]= FUNCC(pred8x8l_vertical_right      , depth);\
+    h->pred8x8l[HOR_DOWN_PRED       ]= FUNCC(pred8x8l_horizontal_down     , depth);\
+    h->pred8x8l[VERT_LEFT_PRED      ]= FUNCC(pred8x8l_vertical_left       , depth);\
+    h->pred8x8l[HOR_UP_PRED         ]= FUNCC(pred8x8l_horizontal_up       , depth);\
+    h->pred8x8l[LEFT_DC_PRED        ]= FUNCC(pred8x8l_left_dc             , depth);\
+    h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
+    h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
+\
+    h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical                   , depth);\
+    h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal                 , depth);\
+    if (codec_id != CODEC_ID_VP8) {\
+        h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                    , depth);\
+    } else\
+        h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
+    if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
+        h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
+        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
+        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
+        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
+        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
+    }else{\
+        h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
+        if (codec_id == CODEC_ID_VP8) {\
+            h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc              , depth);\
+            h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc              , depth);\
+        }\
+    }\
+    h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                     , depth);\
+\
+    h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
+    h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
+    h->pred16x16[HOR_PRED8x8    ]= FUNCC(pred16x16_horizontal             , depth);\
+    switch(codec_id){\
+    case CODEC_ID_SVQ3:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_svq3);\
+       break;\
+    case CODEC_ID_RV40:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_rv40);\
+       break;\
+    case CODEC_ID_VP8:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_tm_vp8);\
+       h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc               , depth);\
+       h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc               , depth);\
+       break;\
+    default:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCC(pred16x16_plane               , depth);\
+       break;\
+    }\
+    h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc                , depth);\
+    h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc                 , depth);\
+    h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc                 , depth);\
+\
+    /* special lossless h/v prediction for h264 */ \
+    h->pred4x4_add  [VERT_PRED   ]= FUNCC(pred4x4_vertical_add            , depth);\
+    h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add          , depth);\
+    h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add           , depth);\
+    h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add         , depth);\
+    h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add            , depth);\
+    h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add          , depth);\
+    h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add          , depth);\
+    h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add        , depth);\
+
+    switch (bit_depth) {
+        case 9:
+            H264_PRED(9)
+            break;
+        case 10:
+            H264_PRED(10)
+            break;
+        default:
+            H264_PRED(8)
+            break;
     }
 
-    h->pred8x8l[VERT_PRED           ]= pred8x8l_vertical_c;
-    h->pred8x8l[HOR_PRED            ]= pred8x8l_horizontal_c;
-    h->pred8x8l[DC_PRED             ]= pred8x8l_dc_c;
-    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
-    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
-    h->pred8x8l[VERT_RIGHT_PRED     ]= pred8x8l_vertical_right_c;
-    h->pred8x8l[HOR_DOWN_PRED       ]= pred8x8l_horizontal_down_c;
-    h->pred8x8l[VERT_LEFT_PRED      ]= pred8x8l_vertical_left_c;
-    h->pred8x8l[HOR_UP_PRED         ]= pred8x8l_horizontal_up_c;
-    h->pred8x8l[LEFT_DC_PRED        ]= pred8x8l_left_dc_c;
-    h->pred8x8l[TOP_DC_PRED         ]= pred8x8l_top_dc_c;
-    h->pred8x8l[DC_128_PRED         ]= pred8x8l_128_dc_c;
-
-    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
-    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
-    if (codec_id != CODEC_ID_VP8) {
-        h->pred8x8[PLANE_PRED8x8]= pred8x8_plane_c;
-    } else
-        h->pred8x8[PLANE_PRED8x8]= pred8x8_tm_vp8_c;
-    if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){
-        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
-        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
-        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
-        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t;
-        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt;
-        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00;
-        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0;
-    }else{
-        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_rv40_c;
-        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c;
-        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c;
-        if (codec_id == CODEC_ID_VP8) {
-            h->pred8x8[DC_127_PRED8x8]= pred8x8_127_dc_c;
-            h->pred8x8[DC_129_PRED8x8]= pred8x8_129_dc_c;
-        }
-    }
-    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
-
-    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
-    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
-    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
-    switch(codec_id){
-    case CODEC_ID_SVQ3:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_svq3_c;
-       break;
-    case CODEC_ID_RV40:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_rv40_c;
-       break;
-    case CODEC_ID_VP8:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_tm_vp8_c;
-       h->pred16x16[DC_127_PRED8x8]= pred16x16_127_dc_c;
-       h->pred16x16[DC_129_PRED8x8]= pred16x16_129_dc_c;
-       break;
-    default:
-       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
-       break;
-    }
-    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
-    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
-    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
-
-    //special lossless h/v prediction for h264
-    h->pred4x4_add  [VERT_PRED   ]= pred4x4_vertical_add_c;
-    h->pred4x4_add  [ HOR_PRED   ]= pred4x4_horizontal_add_c;
-    h->pred8x8l_add [VERT_PRED   ]= pred8x8l_vertical_add_c;
-    h->pred8x8l_add [ HOR_PRED   ]= pred8x8l_horizontal_add_c;
-    h->pred8x8_add  [VERT_PRED8x8]= pred8x8_vertical_add_c;
-    h->pred8x8_add  [ HOR_PRED8x8]= pred8x8_horizontal_add_c;
-    h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c;
-    h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c;
-
-    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id);
-    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id);
+    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
+    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);
 }
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index db3f5809f8..34b1e90bbc 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -101,8 +101,8 @@ typedef struct H264PredContext{
     void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
 }H264PredContext;
 
-void ff_h264_pred_init(H264PredContext *h, int codec_id);
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id);
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id);
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth);
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth);
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth);
 
 #endif /* AVCODEC_H264PRED_H */
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 4bd26e2892..066e837cdf 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -26,21 +26,7 @@
  */
 
 #include "mathops.h"
-#include "dsputil.h"
-
-#define BIT_DEPTH 8
-
-#define pixel uint8_t
-#define pixel4 uint32_t
-#define dctcoef DCTELEM
-
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define FUNC(a) a
-#define FUNCC(a) a ## _c
-#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
-#define AV_WN4P  AV_WN32
-#define AV_WN4PA AV_WN32A
+#include "high_bit_depth.h"
 
 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
     pixel *src = (pixel*)_src;
diff --git a/libavcodec/high_bit_depth.h b/libavcodec/high_bit_depth.h
new file mode 100644
index 0000000000..6f2b6a74f4
--- /dev/null
+++ b/libavcodec/high_bit_depth.h
@@ -0,0 +1,85 @@
+#include "dsputil.h"
+
+#ifndef BIT_DEPTH
+#define BIT_DEPTH 8
+#endif
+
+#ifdef AVCODEC_H264_HIGH_DEPTH_H
+#   undef pixel
+#   undef pixel2
+#   undef pixel4
+#   undef dctcoef
+#   undef INIT_CLIP
+#   undef no_rnd_avg_pixel4
+#   undef rnd_avg_pixel4
+#   undef AV_RN2P
+#   undef AV_RN4P
+#   undef AV_WN2P
+#   undef AV_WN4P
+#   undef AV_WN4PA
+#   undef CLIP
+#   undef FUNC
+#   undef FUNCC
+#   undef av_clip_pixel
+#   undef PIXEL_SPLAT_X4
+#else
+#   define AVCODEC_H264_HIGH_DEPTH_H
+#   define CLIP_PIXEL(depth)\
+    static inline uint16_t av_clip_pixel_ ## depth (int p)\
+    {\
+        const int pixel_max = (1 << depth)-1;\
+        return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\
+    }
+
+CLIP_PIXEL( 9)
+CLIP_PIXEL(10)
+#endif
+
+#if BIT_DEPTH > 8
+#   define pixel  uint16_t
+#   define pixel2 uint32_t
+#   define pixel4 uint64_t
+#   define dctcoef int32_t
+
+#   define INIT_CLIP
+#   define no_rnd_avg_pixel4 no_rnd_avg64
+#   define    rnd_avg_pixel4    rnd_avg64
+#   define AV_RN2P  AV_RN32
+#   define AV_RN4P  AV_RN64
+#   define AV_WN2P  AV_WN32
+#   define AV_WN4P  AV_WN64
+#   define AV_WN4PA AV_WN64A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+#else
+#   define pixel  uint8_t
+#   define pixel2 uint16_t
+#   define pixel4 uint32_t
+#   define dctcoef int16_t
+
+#   define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+#   define no_rnd_avg_pixel4 no_rnd_avg32
+#   define    rnd_avg_pixel4    rnd_avg32
+#   define AV_RN2P  AV_RN16
+#   define AV_RN4P  AV_RN32
+#   define AV_WN2P  AV_WN16
+#   define AV_WN4P  AV_WN32
+#   define AV_WN4PA AV_WN32A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+#endif
+
+#if BIT_DEPTH == 8
+#   define av_clip_pixel(a) av_clip_uint8(a)
+#   define CLIP(a) cm[a]
+#   define FUNC(a)  a ## _8
+#   define FUNCC(a) a ## _8_c
+#elif BIT_DEPTH == 9
+#   define av_clip_pixel(a) av_clip_pixel_9(a)
+#   define CLIP(a)          av_clip_pixel_9(a)
+#   define FUNC(a)  a ## _9
+#   define FUNCC(a) a ## _9_c
+#elif BIT_DEPTH == 10
+#   define av_clip_pixel(a) av_clip_pixel_10(a)
+#   define CLIP(a)          av_clip_pixel_10(a)
+#   define FUNC(a)  a ## _10
+#   define FUNCC(a) a ## _10_c
+#endif
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
index 9e49c91ddf..c0f2c036c6 100644
--- a/libavcodec/mlib/dsputil_mlib.c
+++ b/libavcodec/mlib/dsputil_mlib.c
@@ -421,10 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data)
 
 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     c->get_pixels  = get_pixels_mlib;
     c->diff_pixels = diff_pixels_mlib;
     c->add_pixels_clamped = add_pixels_clamped_mlib;
 
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = put_pixels16_mlib;
     c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
     c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
@@ -445,6 +448,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
 
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
     c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
+    }
 
     c->bswap_buf = bswap_buf_mlib;
 }
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 9111d036a5..adce61b277 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1384,6 +1384,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
 
 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     c->pix_abs[0][1] = sad16_x2_altivec;
     c->pix_abs[0][2] = sad16_y2_altivec;
     c->pix_abs[0][3] = sad16_xy2_altivec;
@@ -1397,8 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
     c->pix_sum = pix_sum_altivec;
     c->diff_pixels = diff_pixels_altivec;
     c->get_pixels = get_pixels_altivec;
+    if (!high_bit_depth)
     c->clear_block = clear_block_altivec;
     c->add_bytes= add_bytes_altivec;
+    if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = put_pixels16_altivec;
     /* the two functions do the same thing, so use the same code */
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
@@ -1409,6 +1413,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
     c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
     c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
+    }
 
     c->hadamard8_diff[0] = hadamard8_diff16_altivec;
     c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index c52ea61805..5f131f3196 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -153,8 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h)
 
 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
     // Common optimizations whether AltiVec is available or not
     c->prefetch = prefetch_ppc;
+    if (!high_bit_depth) {
     switch (check_dcbzl_effect()) {
         case 32:
             c->clear_blocks = clear_blocks_dcbz32_ppc;
@@ -165,6 +168,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
         default:
             break;
     }
+    }
 
 #if HAVE_ALTIVEC
     if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 02699beef4..fae0674720 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -965,8 +965,10 @@ H264_WEIGHT( 8, 8)
 H264_WEIGHT( 8, 4)
 
 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+    if (!high_bit_depth) {
         c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
         c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
 
@@ -992,11 +994,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
         dspfunc(avg_h264_qpel, 0, 16);
 #undef dspfunc
     }
+    }
 }
 
-void ff_h264dsp_init_ppc(H264DSPContext *c)
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth)
 {
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+    if (bit_depth == 8) {
         c->h264_idct_add = ff_h264_idct_add_altivec;
         c->h264_idct_add8 = ff_h264_idct_add8_altivec;
         c->h264_idct_add16 = ff_h264_idct_add16_altivec;
@@ -1019,4 +1023,5 @@ void ff_h264dsp_init_ppc(H264DSPContext *c)
         c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
         c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
     }
+    }
 }
diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c
index b6096b3b4a..f4503a9030 100644
--- a/libavcodec/ps2/dsputil_mmi.c
+++ b/libavcodec/ps2/dsputil_mmi.c
@@ -142,7 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
 {
     const int idct_algo= avctx->idct_algo;
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
+    if (!high_bit_depth) {
     c->clear_blocks = clear_blocks_mmi;
 
     c->put_pixels_tab[1][0] = put_pixels8_mmi;
@@ -150,6 +152,7 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
 
     c->put_pixels_tab[0][0] = put_pixels16_mmi;
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
+    }
 
     c->get_pixels = get_pixels_mmi;
 
diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index f3ab979976..b5d314cf0f 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1384,7 +1384,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
     if (MPV_common_init(s) < 0)
         return -1;
 
-    ff_h264_pred_init(&r->h, CODEC_ID_RV40);
+    ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
 
     r->intra_types_stride = 4*s->mb_stride + 4;
     r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 7a8d60d73e..db40ece670 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -333,6 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
 
 void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
 {
+        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
+        if (!high_bit_depth) {
         c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
         c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
         c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
@@ -368,6 +371,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
         c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
         c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
         c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
+        }
 
 #ifdef QPEL
 
@@ -401,20 +405,24 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
     dspfunc(avg_qpel, 1, 8);
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
 
+    if (!high_bit_depth) {
     dspfunc(put_h264_qpel, 0, 16);
     dspfunc(put_h264_qpel, 1, 8);
     dspfunc(put_h264_qpel, 2, 4);
     dspfunc(avg_h264_qpel, 0, 16);
     dspfunc(avg_h264_qpel, 1, 8);
     dspfunc(avg_h264_qpel, 2, 4);
+    }
 
 #undef dspfunc
+    if (!high_bit_depth) {
     c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
     c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
     c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
     c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
     c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
     c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
+    }
 
     c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c
index 0c724c346d..9ea48ad4a1 100644
--- a/libavcodec/sh4/dsputil_sh4.c
+++ b/libavcodec/sh4/dsputil_sh4.c
@@ -92,8 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
 {
         const int idct_algo= avctx->idct_algo;
+        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
         dsputil_init_align(c,avctx);
 
+        if (!high_bit_depth)
         c->clear_blocks = clear_blocks_sh4;
         if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
                 c->idct_put = idct_put;
diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c
index baf555b7f5..ab9258b2b9 100644
--- a/libavcodec/sparc/dsputil_vis.c
+++ b/libavcodec/sparc/dsputil_vis.c
@@ -3953,6 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
 {
   /* VIS-specific optimizations */
   int accel = vis_level ();
+  const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
   if (accel & ACCEL_SPARC_VIS) {
       if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){
@@ -3962,6 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
           c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
       }
 
+      if (!high_bit_depth) {
       c->put_pixels_tab[0][0] = MC_put_o_16_vis;
       c->put_pixels_tab[0][1] = MC_put_x_16_vis;
       c->put_pixels_tab[0][2] = MC_put_y_16_vis;
@@ -4001,5 +4003,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
       c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
       c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
       c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis;
+      }
   }
 }
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index ea0e911d0b..dc7eb2121a 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1698,7 +1698,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = PIX_FMT_YUV420P;
 
     dsputil_init(&s->dsp, avctx);
-    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
+    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
     ff_vp8dsp_init(&s->vp8dsp);
 
     return 0;
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index f98e6ae0fa..a0cb11aa40 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
+    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
     if (avctx->dsp_mask) {
         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
         c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
         c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
+        if (!high_bit_depth) {
         c->clear_block  = clear_block_mmx;
         c->clear_blocks = clear_blocks_mmx;
         if ((mm_flags & AV_CPU_FLAG_SSE) &&
@@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->clear_block  = clear_block_sse;
             c->clear_blocks = clear_blocks_sse;
         }
+        }
 
 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
         c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
@@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
         c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
 
+        if (!high_bit_depth) {
         SET_HPEL_FUNCS(put, 0, 16, mmx);
         SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
         SET_HPEL_FUNCS(avg, 0, 16, mmx);
@@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
         SET_HPEL_FUNCS(avg, 1, 8, mmx);
         SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
+        }
 
 #if ARCH_X86_32 || !HAVE_YASM
         c->gmc= gmc_mmx;
 #endif
 #if ARCH_X86_32 && HAVE_YASM
+        if (!high_bit_depth)
         c->emulated_edge_mc = emulated_edge_mc_mmx;
 #endif
 
         c->add_bytes= add_bytes_mmx;
         c->add_bytes_l2= add_bytes_l2_mmx;
 
+        if (!high_bit_depth)
         c->draw_edges = draw_edges_mmx;
 
         if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
@@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         }
 
 #if HAVE_YASM
+        if (!high_bit_depth) {
         c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
         c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
+        }
 
         c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
         c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
@@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         if (mm_flags & AV_CPU_FLAG_MMX2) {
             c->prefetch = prefetch_mmx2;
 
+            if (!high_bit_depth) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
             c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
 
@@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
             c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
             c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+            }
 
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                if (!high_bit_depth) {
                 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
                 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
                 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
                 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
                 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+                }
 
                 if (CONFIG_VP3_DECODER && HAVE_YASM) {
                     c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
@@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
 
+            if (!high_bit_depth) {
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
             SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+            }
 
             SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
             SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
@@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
 
+            if (!high_bit_depth) {
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
             c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
             c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
+            }
 
             c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
 #endif
@@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
             c->prefetch = prefetch_3dnow;
 
+            if (!high_bit_depth) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
             c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
 
@@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
             }
+            }
 
             if (CONFIG_VP3_DECODER
                 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
@@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
 
+            if (!high_bit_depth) {
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
             SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+            }
 
             SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
@@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
 
 #if HAVE_YASM
+            if (!high_bit_depth) {
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
+            }
 
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
@@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
         if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
             // these functions are slower than mmx on AMD, but faster on Intel
+            if (!high_bit_depth) {
             c->put_pixels_tab[0][0] = put_pixels16_sse2;
             c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
             c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
             H264_QPEL_FUNCS(0, 0, sse2);
+            }
         }
         if(mm_flags & AV_CPU_FLAG_SSE2){
+            if (!high_bit_depth) {
             H264_QPEL_FUNCS(0, 1, sse2);
             H264_QPEL_FUNCS(0, 2, sse2);
             H264_QPEL_FUNCS(0, 3, sse2);
@@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 1, sse2);
             H264_QPEL_FUNCS(3, 2, sse2);
             H264_QPEL_FUNCS(3, 3, sse2);
+            }
         }
 #if HAVE_SSSE3
         if(mm_flags & AV_CPU_FLAG_SSSE3){
+            if (!high_bit_depth) {
             H264_QPEL_FUNCS(1, 0, ssse3);
             H264_QPEL_FUNCS(1, 1, ssse3);
             H264_QPEL_FUNCS(1, 2, ssse3);
@@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 1, ssse3);
             H264_QPEL_FUNCS(3, 2, ssse3);
             H264_QPEL_FUNCS(3, 3, ssse3);
+            }
             c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
 #if HAVE_YASM
+            if (!high_bit_depth) {
             c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
             c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
+            }
             c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
             if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
                 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
@@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 }
             }
 
+            if (!high_bit_depth)
             c->emulated_edge_mc = emulated_edge_mc_sse;
             c->gmc= gmc_sse;
 #endif
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 4142cc1edd..9eb752581b 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -95,9 +95,13 @@ void ff_pred4x4_tm_vp8_mmxext      (uint8_t *src, const uint8_t *topright, int s
 void ff_pred4x4_tm_vp8_ssse3       (uint8_t *src, const uint8_t *topright, int stride);
 void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
 
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
 {
     int mm_flags = av_get_cpu_flags();
+    const int high_depth = bit_depth > 8;
+
+    if (high_depth)
+        return;
 
 #if HAVE_YASM
     if (mm_flags & AV_CPU_FLAG_MMX) {
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 9f004a5314..b4936a618e 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -285,10 +285,11 @@ H264_BIWEIGHT_MMX    ( 4,  8)
 H264_BIWEIGHT_MMX    ( 4,  4)
 H264_BIWEIGHT_MMX    ( 4,  2)
 
-void ff_h264dsp_init_x86(H264DSPContext *c)
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
 {
     int mm_flags = av_get_cpu_flags();
 
+    if (bit_depth == 8) {
     if (mm_flags & AV_CPU_FLAG_MMX2) {
         c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
     }
@@ -378,5 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c)
             }
         }
     }
+    }
 #endif
 }
-- 
cgit v1.2.3


From d545cf804c4a246285d75aae48741da5d2344993 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Tue, 29 Mar 2011 17:49:00 +0200
Subject: Enable decoding of high bit depth h264.

This patch completes the high bit depth h264 decoding support.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/h264.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'libavcodec')

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 1388dd5c09..616d0a4804 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1930,12 +1930,23 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
                       h->sps.num_units_in_tick, den, 1<<30);
         }
+
+        switch (h->sps.bit_depth_luma) {
+            case 9 :
+                s->avctx->pix_fmt = PIX_FMT_YUV420P9;
+                break;
+            case 10 :
+                s->avctx->pix_fmt = PIX_FMT_YUV420P10;
+                break;
+            default:
         s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
                                                  s->avctx->codec->pix_fmts ?
                                                  s->avctx->codec->pix_fmts :
                                                  s->avctx->color_range == AVCOL_RANGE_JPEG ?
                                                  hwaccel_pixfmt_list_h264_jpeg_420 :
                                                  ff_hwaccel_pixfmt_list_420);
+        }
+
         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
 
         if (MPV_common_init(s) < 0)
-- 
cgit v1.2.3


From e86fbe1751772a3b6790652ab382dcebaeec3aa8 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 6 May 2011 16:31:26 -0400
Subject: h264: do not print "too many references" warning for intra-only.

Fixes issue 2679.
---
 libavcodec/h264_refs.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c
index 13d43e6ebc..a025f7d352 100644
--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c
@@ -621,15 +621,17 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
         }
     }
 
-    if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
+    if (h->long_ref_count + h->short_ref_count -
+            (h->short_ref[0] == s->current_picture_ptr) > h->sps.ref_frame_count){
 
         /* We have too many reference frames, probably due to corrupted
          * stream. Need to discard one frame. Prevents overrun of the
          * short_ref and long_ref buffers.
          */
         av_log(h->s.avctx, AV_LOG_ERROR,
-               "number of reference frames exceeds max (probably "
-               "corrupt input), discarding one\n");
+               "number of reference frames (%d+%d) exceeds max (%d; probably "
+               "corrupt input), discarding one\n",
+               h->long_ref_count, h->short_ref_count, h->sps.ref_frame_count);
 
         if (h->long_ref_count && !h->short_ref_count) {
             for (i = 0; i < 16; ++i)
-- 
cgit v1.2.3


From 86b29553f86471719adfb4ba86eec0a82f1eb347 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 10 May 2011 08:39:38 -0400
Subject: h264dsp_mmx: place bracket outside #if/#endif block.

Should fix compile on systems missing yasm/nasm.
---
 libavcodec/x86/h264dsp_mmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'libavcodec')

diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index b4936a618e..3a783a39ab 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -379,6 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
             }
         }
     }
-    }
 #endif
+    }
 }
-- 
cgit v1.2.3


From b27b54de31af2ad449291514e790aacde650ded2 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 10 May 2011 08:44:49 -0400
Subject: arm/h264pred: add missing argument type.

---
 libavcodec/arm/h264pred_init_arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'libavcodec')

diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index cae32d7567..e96f339a55 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b
         h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
 }
 
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, bit_depth)
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth)
 {
     if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth);
 }
-- 
cgit v1.2.3


From b66752790a94820c23b0ac994d6190dd9048582d Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sat, 30 Apr 2011 07:43:26 +0200
Subject: AVOptions: make default_val a union, as proposed in AVOption2.

This breaks API and ABI.
---
 libavcodec/ac3enc.c      |  70 ++---
 libavcodec/dnxhdenc.c    |   2 +-
 libavcodec/libvorbis.c   |   2 +-
 libavcodec/options.c     | 764 +++++++++++++++++++++++------------------------
 libavfilter/vsrc_movie.c |  12 +-
 libavformat/http.c       |   2 +-
 libavformat/mp3enc.c     |   2 +-
 libavformat/mpegtsenc.c  |  10 +-
 libavformat/options.c    |  34 +--
 libavformat/spdifenc.c   |   8 +-
 libavutil/opt.c          |  14 +-
 libavutil/opt.h          |  55 +---
 libswscale/options.c     |  66 ++--
 13 files changed, 497 insertions(+), 544 deletions(-)

(limited to 'libavcodec')

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index a35ff29d1a..860208d493 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -227,46 +227,46 @@ static const float extmixlev_options[EXTMIXLEV_NUM_OPTIONS] = {
 
 static const AVOption options[] = {
 /* Metadata Options */
-{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, 0, 0, 1, AC3ENC_PARAM},
+{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
 /* downmix levels */
-{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, LEVEL_MINUS_4POINT5DB, 0.0, 1.0, AC3ENC_PARAM},
-{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, LEVEL_MINUS_6DB, 0.0, 1.0, AC3ENC_PARAM},
+{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM},
+{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM},
 /* audio production information */
-{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, -1, -1, 111, AC3ENC_PARAM},
-{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, -1, -1, 2, AC3ENC_PARAM, "room_type"},
-    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, 0, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-    {"large",        "Large Room",              0, FF_OPT_TYPE_CONST, 1, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-    {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, 2, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM},
+{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"large",        "Large Room",              0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
 /* other metadata options */
-{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, 0, 0, 1, AC3ENC_PARAM},
-{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, -31, -31, -1, AC3ENC_PARAM},
-{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, 0, 0, 2, AC3ENC_PARAM, "dsur_mode"},
-    {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, 0, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-    {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, 1, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-    {"off",          "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, 2, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT, 1, 0, 1, AC3ENC_PARAM},
+{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM},
+{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"},
+    {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"off",          "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT,   {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
 /* extended bitstream information */
-{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, -1, -1, 2, AC3ENC_PARAM, "dmix_mode"},
-    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, 0, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-    {"ltrt", "Lt/Rt Downmix Preferred",         0, FF_OPT_TYPE_CONST, 1, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-    {"loro", "Lo/Ro Downmix Preferred",         0, FF_OPT_TYPE_CONST, 2, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, -1.0, -1.0, 2.0, AC3ENC_PARAM},
-{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, -1.0, -1.0, 2.0, AC3ENC_PARAM},
-{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, -1.0, -1.0, 2.0, AC3ENC_PARAM},
-{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, -1.0, -1.0, 2.0, AC3ENC_PARAM},
-{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, -1, -1, 2, AC3ENC_PARAM, "dsurex_mode"},
-    {"notindicated", "Not Indicated (default)",       0, FF_OPT_TYPE_CONST, 0, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-    {"on",           "Dolby Surround EX Encoded",     0, FF_OPT_TYPE_CONST, 1, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-    {"off",          "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, 2, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, -1, -1, 2, AC3ENC_PARAM, "dheadphone_mode"},
-    {"notindicated", "Not Indicated (default)",     0, FF_OPT_TYPE_CONST, 0, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-    {"on",           "Dolby Headphone Encoded",     0, FF_OPT_TYPE_CONST, 1, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-    {"off",          "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, 2, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, -1, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
-    {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, 0, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
-    {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, 1, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"ltrt", "Lt/Rt Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"loro", "Lo/Ro Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"},
+    {"notindicated", "Not Indicated (default)",       0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"on",           "Dolby Surround EX Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"off",          "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"},
+    {"notindicated", "Not Indicated (default)",     0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"on",           "Dolby Headphone Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"off",          "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
+    {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+    {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
 /* Other Encoding Options */
-{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, 1, 0, 1, AC3ENC_PARAM},
+{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
 {NULL}
 };
 
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index 36ef5eeb51..e00b6f67a3 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -33,7 +33,7 @@
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 static const AVOption options[]={
-    {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), FF_OPT_TYPE_INT, 0, 0, 1, VE},
+    {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, VE},
 {NULL}
 };
 static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
diff --git a/libavcodec/libvorbis.c b/libavcodec/libvorbis.c
index 0a52daf3f1..88da705a32 100644
--- a/libavcodec/libvorbis.c
+++ b/libavcodec/libvorbis.c
@@ -55,7 +55,7 @@ typedef struct OggVorbisContext {
 } OggVorbisContext ;
 
 static const AVOption options[]={
-{"iblock", "Sets the impulse block bias", offsetof(OggVorbisContext, iblock), FF_OPT_TYPE_DOUBLE, 0, -15, 0, AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_ENCODING_PARAM},
+{"iblock", "Sets the impulse block bias", offsetof(OggVorbisContext, iblock), FF_OPT_TYPE_DOUBLE, {.dbl = 0}, -15, 0, AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_ENCODING_PARAM},
 {NULL}
 };
 static const AVClass class = { "libvorbis", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
diff --git a/libavcodec/options.c b/libavcodec/options.c
index 03b87a3572..b22e53db9d 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -49,398 +49,398 @@ static const char* context_to_name(void* ptr) {
 #define AV_CODEC_DEFAULT_BITRATE 200*1000
 
 static const AVOption options[]={
-{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE, INT_MIN, INT_MAX, V|E},
-{"ab", "set bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, 64*1000, INT_MIN, INT_MAX, A|E},
-{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, 1, INT_MAX, V|E},
-{"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, 0, UINT_MAX, V|A|E|D, "flags"},
-{"mv4", "use four motion vector by macroblock (mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_4MV, INT_MIN, INT_MAX, V|E, "flags"},
-{"obmc", "use overlapped block motion compensation (h263+)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_OBMC, INT_MIN, INT_MAX, V|E, "flags"},
-{"qpel", "use 1/4 pel motion compensation", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_QPEL, INT_MIN, INT_MAX, V|E, "flags"},
-{"loop", "use loop filter", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_LOOP_FILTER, INT_MIN, INT_MAX, V|E, "flags"},
-{"qscale", "use fixed qscale", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_QSCALE, INT_MIN, INT_MAX, 0, "flags"},
-{"gmc", "use gmc", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_GMC, INT_MIN, INT_MAX, V|E, "flags"},
-{"mv0", "always try a mb with mv=<0,0>", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_MV0, INT_MIN, INT_MAX, V|E, "flags"},
-{"part", "use data partitioning", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PART, INT_MIN, INT_MAX, V|E, "flags"},
-{"input_preserved", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG_INPUT_PRESERVED, INT_MIN, INT_MAX, 0, "flags"},
-{"pass1", "use internal 2pass ratecontrol in first  pass mode", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PASS1, INT_MIN, INT_MAX, 0, "flags"},
-{"pass2", "use internal 2pass ratecontrol in second pass mode", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PASS2, INT_MIN, INT_MAX, 0, "flags"},
-{"extern_huff", "use external huffman table (for mjpeg)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_EXTERN_HUFF, INT_MIN, INT_MAX, 0, "flags"},
-{"gray", "only decode/encode grayscale", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_GRAY, INT_MIN, INT_MAX, V|E|D, "flags"},
-{"emu_edge", "don't draw edges", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_EMU_EDGE, INT_MIN, INT_MAX, 0, "flags"},
-{"psnr", "error[?] variables will be set during encoding", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PSNR, INT_MIN, INT_MAX, V|E, "flags"},
-{"truncated", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG_TRUNCATED, INT_MIN, INT_MAX, 0, "flags"},
-{"naq", "normalize adaptive quantization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_NORMALIZE_AQP, INT_MIN, INT_MAX, V|E, "flags"},
-{"ildct", "use interlaced dct", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_INTERLACED_DCT, INT_MIN, INT_MAX, V|E, "flags"},
-{"low_delay", "force low delay", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_LOW_DELAY, INT_MIN, INT_MAX, V|D|E, "flags"},
-{"alt", "enable alternate scantable (mpeg2/mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_ALT_SCAN, INT_MIN, INT_MAX, V|E, "flags"},
-{"global_header", "place global headers in extradata instead of every keyframe", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_GLOBAL_HEADER, INT_MIN, INT_MAX, V|A|E, "flags"},
-{"bitexact", "use only bitexact stuff (except (i)dct)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_BITEXACT, INT_MIN, INT_MAX, A|V|S|D|E, "flags"},
-{"aic", "h263 advanced intra coding / mpeg4 ac prediction", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_AC_PRED, INT_MIN, INT_MAX, V|E, "flags"},
-{"umv", "use unlimited motion vectors", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_H263P_UMV, INT_MIN, INT_MAX, V|E, "flags"},
-{"cbp", "use rate distortion optimization for cbp", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_CBP_RD, INT_MIN, INT_MAX, V|E, "flags"},
-{"qprd", "use rate distortion optimization for qp selection", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_QP_RD, INT_MIN, INT_MAX, V|E, "flags"},
-{"aiv", "h263 alternative inter vlc", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_H263P_AIV, INT_MIN, INT_MAX, V|E, "flags"},
-{"slice", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG_H263P_SLICE_STRUCT, INT_MIN, INT_MAX, V|E, "flags"},
-{"ilme", "interlaced motion estimation", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_INTERLACED_ME, INT_MIN, INT_MAX, V|E, "flags"},
-{"scan_offset", "will reserve space for svcd scan offset user data", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_SVCD_SCAN_OFFSET, INT_MIN, INT_MAX, V|E, "flags"},
-{"cgop", "closed gop", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_CLOSED_GOP, INT_MIN, INT_MAX, V|E, "flags"},
-{"fast", "allow non spec compliant speedup tricks", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FAST, INT_MIN, INT_MAX, V|E, "flags2"},
-{"sgop", "strictly enforce gop size", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_STRICT_GOP, INT_MIN, INT_MAX, V|E, "flags2"},
-{"noout", "skip bitstream encoding", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_NO_OUTPUT, INT_MIN, INT_MAX, V|E, "flags2"},
-{"local_header", "place global headers at every keyframe instead of in extradata", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_LOCAL_HEADER, INT_MIN, INT_MAX, V|E, "flags2"},
-{"sub_id", NULL, OFFSET(sub_id), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"me_method", "set motion estimation method", OFFSET(me_method), FF_OPT_TYPE_INT, ME_EPZS, INT_MIN, INT_MAX, V|E, "me_method"},
-{"zero", "zero motion estimation (fastest)", 0, FF_OPT_TYPE_CONST, ME_ZERO, INT_MIN, INT_MAX, V|E, "me_method" },
-{"full", "full motion estimation (slowest)", 0, FF_OPT_TYPE_CONST, ME_FULL, INT_MIN, INT_MAX, V|E, "me_method" },
-{"epzs", "EPZS motion estimation (default)", 0, FF_OPT_TYPE_CONST, ME_EPZS, INT_MIN, INT_MAX, V|E, "me_method" },
-{"esa", "esa motion estimation (alias for full)", 0, FF_OPT_TYPE_CONST, ME_FULL, INT_MIN, INT_MAX, V|E, "me_method" },
-{"tesa", "tesa motion estimation", 0, FF_OPT_TYPE_CONST, ME_TESA, INT_MIN, INT_MAX, V|E, "me_method" },
-{"dia", "dia motion estimation (alias for epzs)", 0, FF_OPT_TYPE_CONST, ME_EPZS, INT_MIN, INT_MAX, V|E, "me_method" },
-{"log", "log motion estimation", 0, FF_OPT_TYPE_CONST, ME_LOG, INT_MIN, INT_MAX, V|E, "me_method" },
-{"phods", "phods motion estimation", 0, FF_OPT_TYPE_CONST, ME_PHODS, INT_MIN, INT_MAX, V|E, "me_method" },
-{"x1", "X1 motion estimation", 0, FF_OPT_TYPE_CONST, ME_X1, INT_MIN, INT_MAX, V|E, "me_method" },
-{"hex", "hex motion estimation", 0, FF_OPT_TYPE_CONST, ME_HEX, INT_MIN, INT_MAX, V|E, "me_method" },
-{"umh", "umh motion estimation", 0, FF_OPT_TYPE_CONST, ME_UMH, INT_MIN, INT_MAX, V|E, "me_method" },
-{"iter", "iter motion estimation", 0, FF_OPT_TYPE_CONST, ME_ITER, INT_MIN, INT_MAX, V|E, "me_method" },
-{"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX},
-{"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E},
-{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
-{"frame_size", NULL, OFFSET(frame_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
-{"frame_number", NULL, OFFSET(frame_number), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"delay", NULL, OFFSET(delay), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"qcomp", "video quantizer scale compression (VBR)", OFFSET(qcompress), FF_OPT_TYPE_FLOAT, 0.5, -FLT_MAX, FLT_MAX, V|E},
-{"qblur", "video quantizer scale blur (VBR)", OFFSET(qblur), FF_OPT_TYPE_FLOAT, 0.5, 0, FLT_MAX, V|E},
-{"qmin", "min video quantizer scale (VBR)", OFFSET(qmin), FF_OPT_TYPE_INT, 2, 0, 69, V|E},
-{"qmax", "max video quantizer scale (VBR)", OFFSET(qmax), FF_OPT_TYPE_INT, 31, 0, 69, V|E},
-{"qdiff", "max difference between the quantizer scale (VBR)", OFFSET(max_qdiff), FF_OPT_TYPE_INT, 3, INT_MIN, INT_MAX, V|E},
-{"bf", "use 'frames' B frames", OFFSET(max_b_frames), FF_OPT_TYPE_INT, DEFAULT, 0, FF_MAX_B_FRAMES, V|E},
-{"b_qfactor", "qp factor between p and b frames", OFFSET(b_quant_factor), FF_OPT_TYPE_FLOAT, 1.25, -FLT_MAX, FLT_MAX, V|E},
-{"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
-{"wpredp", "weighted prediction analysis method", OFFSET(weighted_p_pred), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
-{"ps", "rtp payload size in bytes", OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"mv_bits", NULL, OFFSET(mv_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"header_bits", NULL, OFFSET(header_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"i_tex_bits", NULL, OFFSET(i_tex_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"p_tex_bits", NULL, OFFSET(p_tex_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"i_count", NULL, OFFSET(i_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"p_count", NULL, OFFSET(p_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"skip_count", NULL, OFFSET(skip_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"misc_bits", NULL, OFFSET(misc_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"frame_bits", NULL, OFFSET(frame_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"codec_tag", NULL, OFFSET(codec_tag), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"bug", "workaround not auto detected encoder bugs", OFFSET(workaround_bugs), FF_OPT_TYPE_FLAGS, FF_BUG_AUTODETECT, INT_MIN, INT_MAX, V|D, "bug"},
-{"autodetect", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_AUTODETECT, INT_MIN, INT_MAX, V|D, "bug"},
-{"old_msmpeg4", "some old lavc generated msmpeg4v3 files (no autodetection)", 0, FF_OPT_TYPE_CONST, FF_BUG_OLD_MSMPEG4, INT_MIN, INT_MAX, V|D, "bug"},
-{"xvid_ilace", "Xvid interlacing bug (autodetected if fourcc==XVIX)", 0, FF_OPT_TYPE_CONST, FF_BUG_XVID_ILACE, INT_MIN, INT_MAX, V|D, "bug"},
-{"ump4", "(autodetected if fourcc==UMP4)", 0, FF_OPT_TYPE_CONST, FF_BUG_UMP4, INT_MIN, INT_MAX, V|D, "bug"},
-{"no_padding", "padding bug (autodetected)", 0, FF_OPT_TYPE_CONST, FF_BUG_NO_PADDING, INT_MIN, INT_MAX, V|D, "bug"},
-{"amv", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_AMV, INT_MIN, INT_MAX, V|D, "bug"},
-{"ac_vlc", "illegal vlc bug (autodetected per fourcc)", 0, FF_OPT_TYPE_CONST, FF_BUG_AC_VLC, INT_MIN, INT_MAX, V|D, "bug"},
-{"qpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_QPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"},
-{"std_qpel", "old standard qpel (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, FF_BUG_STD_QPEL, INT_MIN, INT_MAX, V|D, "bug"},
-{"qpel_chroma2", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_QPEL_CHROMA2, INT_MIN, INT_MAX, V|D, "bug"},
-{"direct_blocksize", "direct-qpel-blocksize bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, FF_BUG_DIRECT_BLOCKSIZE, INT_MIN, INT_MAX, V|D, "bug"},
-{"edge", "edge padding bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, FF_BUG_EDGE, INT_MIN, INT_MAX, V|D, "bug"},
-{"hpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_HPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"},
-{"dc_clip", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_DC_CLIP, INT_MIN, INT_MAX, V|D, "bug"},
-{"ms", "workaround various bugs in microsofts broken decoders", 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
-{"trunc", "trancated frames", 0, FF_OPT_TYPE_CONST,FF_BUG_TRUNCATED, INT_MIN, INT_MAX, V|D, "bug"},
-{"lelim", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)", OFFSET(luma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"celim", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)", OFFSET(chroma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|V|D|E, "strict"},
-{"very", "strictly conform to a older more strict version of the spec or reference software", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|D|E, "strict"},
-{"strict", "strictly conform to all the things in the spec no matter what consequences", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|D|E, "strict"},
-{"normal", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_NORMAL, INT_MIN, INT_MAX, V|D|E, "strict"},
-{"unofficial", "allow unofficial extensions", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_UNOFFICIAL, INT_MIN, INT_MAX, V|D|E, "strict"},
-{"experimental", "allow non standardized experimental things", 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|D|E, "strict"},
-{"b_qoffset", "qp offset between P and B frames", OFFSET(b_quant_offset), FF_OPT_TYPE_FLOAT, 1.25, -FLT_MAX, FLT_MAX, V|E},
-{"er", "set error detection aggressivity", OFFSET(error_recognition), FF_OPT_TYPE_INT, FF_ER_CAREFUL, INT_MIN, INT_MAX, A|V|D, "er"},
-{"careful", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
-{"compliant", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_COMPLIANT, INT_MIN, INT_MAX, V|D, "er"},
-{"aggressive", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_AGGRESSIVE, INT_MIN, INT_MAX, V|D, "er"},
-{"very_aggressive", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_VERY_AGGRESSIVE, INT_MIN, INT_MAX, V|D, "er"},
-{"has_b_frames", NULL, OFFSET(has_b_frames), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"block_align", NULL, OFFSET(block_align), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"parse_only", NULL, OFFSET(parse_only), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"mpeg_quant", "use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"stats_out", NULL, OFFSET(stats_out), FF_OPT_TYPE_STRING, DEFAULT, CHAR_MIN, CHAR_MAX},
-{"stats_in", NULL, OFFSET(stats_in), FF_OPT_TYPE_STRING, DEFAULT, CHAR_MIN, CHAR_MAX},
-{"qsquish", "how to keep quantizer between qmin and qmax (0 = clip, 1 = use differentiable function)", OFFSET(rc_qsquish), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 99, V|E},
-{"rc_qmod_amp", "experimental quantizer modulation", OFFSET(rc_qmod_amp), FF_OPT_TYPE_FLOAT, DEFAULT, -FLT_MAX, FLT_MAX, V|E},
-{"rc_qmod_freq", "experimental quantizer modulation", OFFSET(rc_qmod_freq), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"rc_override_count", NULL, OFFSET(rc_override_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"rc_eq", "set rate control equation", OFFSET(rc_eq), FF_OPT_TYPE_STRING, DEFAULT, CHAR_MIN, CHAR_MAX, V|E},
-{"maxrate", "set max video bitrate tolerance (in bits/s)", OFFSET(rc_max_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"minrate", "set min video bitrate tolerance (in bits/s)", OFFSET(rc_min_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|V|E},
-{"rc_buf_aggressivity", "currently useless", OFFSET(rc_buffer_aggressivity), FF_OPT_TYPE_FLOAT, 1.0, -FLT_MAX, FLT_MAX, V|E},
-{"i_qfactor", "qp factor between P and I frames", OFFSET(i_quant_factor), FF_OPT_TYPE_FLOAT, -0.8, -FLT_MAX, FLT_MAX, V|E},
-{"i_qoffset", "qp offset between P and I frames", OFFSET(i_quant_offset), FF_OPT_TYPE_FLOAT, 0.0, -FLT_MAX, FLT_MAX, V|E},
-{"rc_init_cplx", "initial complexity for 1-pass encoding", OFFSET(rc_initial_cplx), FF_OPT_TYPE_FLOAT, DEFAULT, -FLT_MAX, FLT_MAX, V|E},
-{"dct", "DCT algorithm", OFFSET(dct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E, "dct"},
-{"auto", "autoselect a good one (default)", 0, FF_OPT_TYPE_CONST, FF_DCT_AUTO, INT_MIN, INT_MAX, V|E, "dct"},
-{"fastint", "fast integer", 0, FF_OPT_TYPE_CONST, FF_DCT_FASTINT, INT_MIN, INT_MAX, V|E, "dct"},
-{"int", "accurate integer", 0, FF_OPT_TYPE_CONST, FF_DCT_INT, INT_MIN, INT_MAX, V|E, "dct"},
-{"mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MMX, INT_MIN, INT_MAX, V|E, "dct"},
-{"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MLIB, INT_MIN, INT_MAX, V|E, "dct"},
-{"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_ALTIVEC, INT_MIN, INT_MAX, V|E, "dct"},
-{"faan", "floating point AAN DCT", 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
-{"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
-{"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
-{"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
-{"p_mask", "inter masking", OFFSET(p_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
-{"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
-{"idct", "select IDCT implementation", OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
-{"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_AUTO, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"int", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_INT, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"simple", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLE, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"simplemmx", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEMMX, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"libmpeg2mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_LIBMPEG2MMX, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"ps2", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_PS2, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_MLIB, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"arm", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_ARM, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_ALTIVEC, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"sh4", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SH4, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"simplearm", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARM, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"simplearmv5te", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV5TE, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"simplearmv6", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV6, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"simpleneon", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLENEON, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"simplealpha", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEALPHA, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"h264", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_H264, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"vp3", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_VP3, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"ipp", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_IPP, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"xvidmmx", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_XVIDMMX, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"faani", "floating point AAN IDCT", 0, FF_OPT_TYPE_CONST, FF_IDCT_FAAN, INT_MIN, INT_MAX, V|D|E, "idct"},
-{"slice_count", NULL, OFFSET(slice_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"ec", "set error concealment strategy", OFFSET(error_concealment), FF_OPT_TYPE_FLAGS, 3, INT_MIN, INT_MAX, V|D, "ec"},
-{"guess_mvs", "iterative motion vector (MV) search (slow)", 0, FF_OPT_TYPE_CONST, FF_EC_GUESS_MVS, INT_MIN, INT_MAX, V|D, "ec"},
-{"deblock", "use strong deblock filter for damaged MBs", 0, FF_OPT_TYPE_CONST, FF_EC_DEBLOCK, INT_MIN, INT_MAX, V|D, "ec"},
-{"bits_per_coded_sample", NULL, OFFSET(bits_per_coded_sample), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"pred", "prediction method", OFFSET(prediction_method), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "pred"},
-{"left", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_LEFT, INT_MIN, INT_MAX, V|E, "pred"},
-{"plane", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_PLANE, INT_MIN, INT_MAX, V|E, "pred"},
-{"median", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_MEDIAN, INT_MIN, INT_MAX, V|E, "pred"},
-{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
-{"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, DEFAULT, 0, INT_MAX, V|A|S|E|D, "debug"},
-{"pict", "picture info", 0, FF_OPT_TYPE_CONST, FF_DEBUG_PICT_INFO, INT_MIN, INT_MAX, V|D, "debug"},
-{"rc", "rate control", 0, FF_OPT_TYPE_CONST, FF_DEBUG_RC, INT_MIN, INT_MAX, V|E, "debug"},
-{"bitstream", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_BITSTREAM, INT_MIN, INT_MAX, V|D, "debug"},
-{"mb_type", "macroblock (MB) type", 0, FF_OPT_TYPE_CONST, FF_DEBUG_MB_TYPE, INT_MIN, INT_MAX, V|D, "debug"},
-{"qp", "per-block quantization parameter (QP)", 0, FF_OPT_TYPE_CONST, FF_DEBUG_QP, INT_MIN, INT_MAX, V|D, "debug"},
-{"mv", "motion vector", 0, FF_OPT_TYPE_CONST, FF_DEBUG_MV, INT_MIN, INT_MAX, V|D, "debug"},
-{"dct_coeff", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_DCT_COEFF, INT_MIN, INT_MAX, V|D, "debug"},
-{"skip", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_SKIP, INT_MIN, INT_MAX, V|D, "debug"},
-{"startcode", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_STARTCODE, INT_MIN, INT_MAX, V|D, "debug"},
-{"pts", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_PTS, INT_MIN, INT_MAX, V|D, "debug"},
-{"er", "error recognition", 0, FF_OPT_TYPE_CONST, FF_DEBUG_ER, INT_MIN, INT_MAX, V|D, "debug"},
-{"mmco", "memory management control operations (H.264)", 0, FF_OPT_TYPE_CONST, FF_DEBUG_MMCO, INT_MIN, INT_MAX, V|D, "debug"},
-{"bugs", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_BUGS, INT_MIN, INT_MAX, V|D, "debug"},
-{"vis_qp", "visualize quantization parameter (QP), lower QP are tinted greener", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_QP, INT_MIN, INT_MAX, V|D, "debug"},
-{"vis_mb_type", "visualize block types", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MB_TYPE, INT_MIN, INT_MAX, V|D, "debug"},
-{"buffers", "picture buffer allocations", 0, FF_OPT_TYPE_CONST, FF_DEBUG_BUFFERS, INT_MIN, INT_MAX, V|D, "debug"},
-{"thread_ops", "threading operations", 0, FF_OPT_TYPE_CONST, FF_DEBUG_THREADS, INT_MIN, INT_MAX, V|D, "debug"},
-{"vismv", "visualize motion vectors (MVs)", OFFSET(debug_mv), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|D, "debug_mv"},
-{"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"},
-{"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"ildctcmp", "interlaced dct compare function", OFFSET(ildct_cmp), FF_OPT_TYPE_INT, FF_CMP_VSAD, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"dia_size", "diamond type & size for motion estimation", OFFSET(dia_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"last_pred", "amount of motion predictors from the previous frame", OFFSET(last_predictor_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"preme", "pre motion estimation", OFFSET(pre_me), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"sad", "sum of absolute differences, fast (default)", 0, FF_OPT_TYPE_CONST, FF_CMP_SAD, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"sse", "sum of squared errors", 0, FF_OPT_TYPE_CONST, FF_CMP_SSE, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"satd", "sum of absolute Hadamard transformed differences", 0, FF_OPT_TYPE_CONST, FF_CMP_SATD, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"dct", "sum of absolute DCT transformed differences", 0, FF_OPT_TYPE_CONST, FF_CMP_DCT, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"psnr", "sum of squared quantization errors (avoid, low quality)", 0, FF_OPT_TYPE_CONST, FF_CMP_PSNR, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"bit", "number of bits needed for the block", 0, FF_OPT_TYPE_CONST, FF_CMP_BIT, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"rd", "rate distortion optimal, slow", 0, FF_OPT_TYPE_CONST, FF_CMP_RD, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"zero", "0", 0, FF_OPT_TYPE_CONST, FF_CMP_ZERO, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"vsad", "sum of absolute vertical differences", 0, FF_OPT_TYPE_CONST, FF_CMP_VSAD, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"vsse", "sum of squared vertical differences", 0, FF_OPT_TYPE_CONST, FF_CMP_VSSE, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"nsse", "noise preserving sum of squared differences", 0, FF_OPT_TYPE_CONST, FF_CMP_NSSE, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, {.dbl = AV_CODEC_DEFAULT_BITRATE }, INT_MIN, INT_MAX, V|E},
+{"ab", "set bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, {.dbl = 64*1000 }, INT_MIN, INT_MAX, A|E},
+{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, {.dbl = AV_CODEC_DEFAULT_BITRATE*20 }, 1, INT_MAX, V|E},
+{"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, UINT_MAX, V|A|E|D, "flags"},
+{"mv4", "use four motion vector by macroblock (mpeg4)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_4MV }, INT_MIN, INT_MAX, V|E, "flags"},
+{"obmc", "use overlapped block motion compensation (h263+)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_OBMC }, INT_MIN, INT_MAX, V|E, "flags"},
+{"qpel", "use 1/4 pel motion compensation", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_QPEL }, INT_MIN, INT_MAX, V|E, "flags"},
+{"loop", "use loop filter", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_LOOP_FILTER }, INT_MIN, INT_MAX, V|E, "flags"},
+{"qscale", "use fixed qscale", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_QSCALE }, INT_MIN, INT_MAX, 0, "flags"},
+{"gmc", "use gmc", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_GMC }, INT_MIN, INT_MAX, V|E, "flags"},
+{"mv0", "always try a mb with mv=<0,0>", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_MV0 }, INT_MIN, INT_MAX, V|E, "flags"},
+{"part", "use data partitioning", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_PART }, INT_MIN, INT_MAX, V|E, "flags"},
+{"input_preserved", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_INPUT_PRESERVED }, INT_MIN, INT_MAX, 0, "flags"},
+{"pass1", "use internal 2pass ratecontrol in first  pass mode", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_PASS1 }, INT_MIN, INT_MAX, 0, "flags"},
+{"pass2", "use internal 2pass ratecontrol in second pass mode", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_PASS2 }, INT_MIN, INT_MAX, 0, "flags"},
+{"extern_huff", "use external huffman table (for mjpeg)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_EXTERN_HUFF }, INT_MIN, INT_MAX, 0, "flags"},
+{"gray", "only decode/encode grayscale", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_GRAY }, INT_MIN, INT_MAX, V|E|D, "flags"},
+{"emu_edge", "don't draw edges", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_EMU_EDGE }, INT_MIN, INT_MAX, 0, "flags"},
+{"psnr", "error[?] variables will be set during encoding", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_PSNR }, INT_MIN, INT_MAX, V|E, "flags"},
+{"truncated", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_TRUNCATED }, INT_MIN, INT_MAX, 0, "flags"},
+{"naq", "normalize adaptive quantization", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_NORMALIZE_AQP }, INT_MIN, INT_MAX, V|E, "flags"},
+{"ildct", "use interlaced dct", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_INTERLACED_DCT }, INT_MIN, INT_MAX, V|E, "flags"},
+{"low_delay", "force low delay", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_LOW_DELAY }, INT_MIN, INT_MAX, V|D|E, "flags"},
+{"alt", "enable alternate scantable (mpeg2/mpeg4)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_ALT_SCAN }, INT_MIN, INT_MAX, V|E, "flags"},
+{"global_header", "place global headers in extradata instead of every keyframe", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_GLOBAL_HEADER }, INT_MIN, INT_MAX, V|A|E, "flags"},
+{"bitexact", "use only bitexact stuff (except (i)dct)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_BITEXACT }, INT_MIN, INT_MAX, A|V|S|D|E, "flags"},
+{"aic", "h263 advanced intra coding / mpeg4 ac prediction", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_AC_PRED }, INT_MIN, INT_MAX, V|E, "flags"},
+{"umv", "use unlimited motion vectors", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_H263P_UMV }, INT_MIN, INT_MAX, V|E, "flags"},
+{"cbp", "use rate distortion optimization for cbp", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_CBP_RD }, INT_MIN, INT_MAX, V|E, "flags"},
+{"qprd", "use rate distortion optimization for qp selection", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_QP_RD }, INT_MIN, INT_MAX, V|E, "flags"},
+{"aiv", "h263 alternative inter vlc", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_H263P_AIV }, INT_MIN, INT_MAX, V|E, "flags"},
+{"slice", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_H263P_SLICE_STRUCT }, INT_MIN, INT_MAX, V|E, "flags"},
+{"ilme", "interlaced motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_INTERLACED_ME }, INT_MIN, INT_MAX, V|E, "flags"},
+{"scan_offset", "will reserve space for svcd scan offset user data", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_SVCD_SCAN_OFFSET }, INT_MIN, INT_MAX, V|E, "flags"},
+{"cgop", "closed gop", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG_CLOSED_GOP }, INT_MIN, INT_MAX, V|E, "flags"},
+{"fast", "allow non spec compliant speedup tricks", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_FAST }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"sgop", "strictly enforce gop size", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_STRICT_GOP }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"noout", "skip bitstream encoding", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"local_header", "place global headers at every keyframe instead of in extradata", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"sub_id", NULL, OFFSET(sub_id), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"me_method", "set motion estimation method", OFFSET(me_method), FF_OPT_TYPE_INT, {.dbl = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"},
+{"zero", "zero motion estimation (fastest)", 0, FF_OPT_TYPE_CONST, {.dbl = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"full", "full motion estimation (slowest)", 0, FF_OPT_TYPE_CONST, {.dbl = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"epzs", "EPZS motion estimation (default)", 0, FF_OPT_TYPE_CONST, {.dbl = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"esa", "esa motion estimation (alias for full)", 0, FF_OPT_TYPE_CONST, {.dbl = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"tesa", "tesa motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_TESA }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"dia", "dia motion estimation (alias for epzs)", 0, FF_OPT_TYPE_CONST, {.dbl = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"log", "log motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_LOG }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"phods", "phods motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_PHODS }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"x1", "X1 motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_X1 }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"hex", "hex motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_HEX }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"umh", "umh motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_UMH }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"iter", "iter motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_ITER }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, {.dbl = 0}, INT_MIN, INT_MAX},
+{"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, {.dbl = 12 }, INT_MIN, INT_MAX, V|E},
+{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, A|E},
+{"frame_size", NULL, OFFSET(frame_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, A|E},
+{"frame_number", NULL, OFFSET(frame_number), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"delay", NULL, OFFSET(delay), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"qcomp", "video quantizer scale compression (VBR)", OFFSET(qcompress), FF_OPT_TYPE_FLOAT, {.dbl = 0.5 }, -FLT_MAX, FLT_MAX, V|E},
+{"qblur", "video quantizer scale blur (VBR)", OFFSET(qblur), FF_OPT_TYPE_FLOAT, {.dbl = 0.5 }, 0, FLT_MAX, V|E},
+{"qmin", "min video quantizer scale (VBR)", OFFSET(qmin), FF_OPT_TYPE_INT, {.dbl = 2 }, 0, 69, V|E},
+{"qmax", "max video quantizer scale (VBR)", OFFSET(qmax), FF_OPT_TYPE_INT, {.dbl = 31 }, 0, 69, V|E},
+{"qdiff", "max difference between the quantizer scale (VBR)", OFFSET(max_qdiff), FF_OPT_TYPE_INT, {.dbl = 3 }, INT_MIN, INT_MAX, V|E},
+{"bf", "use 'frames' B frames", OFFSET(max_b_frames), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, FF_MAX_B_FRAMES, V|E},
+{"b_qfactor", "qp factor between p and b frames", OFFSET(b_quant_factor), FF_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
+{"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, {.dbl = 0 }, INT_MIN, INT_MAX, V|E},
+{"wpredp", "weighted prediction analysis method", OFFSET(weighted_p_pred), FF_OPT_TYPE_INT, {.dbl = 0 }, INT_MIN, INT_MAX, V|E},
+{"ps", "rtp payload size in bytes", OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"mv_bits", NULL, OFFSET(mv_bits), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"header_bits", NULL, OFFSET(header_bits), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"i_tex_bits", NULL, OFFSET(i_tex_bits), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"p_tex_bits", NULL, OFFSET(p_tex_bits), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"i_count", NULL, OFFSET(i_count), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"p_count", NULL, OFFSET(p_count), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"skip_count", NULL, OFFSET(skip_count), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"misc_bits", NULL, OFFSET(misc_bits), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"frame_bits", NULL, OFFSET(frame_bits), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"codec_tag", NULL, OFFSET(codec_tag), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"bug", "workaround not auto detected encoder bugs", OFFSET(workaround_bugs), FF_OPT_TYPE_FLAGS, {.dbl = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
+{"autodetect", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
+{"old_msmpeg4", "some old lavc generated msmpeg4v3 files (no autodetection)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_OLD_MSMPEG4 }, INT_MIN, INT_MAX, V|D, "bug"},
+{"xvid_ilace", "Xvid interlacing bug (autodetected if fourcc==XVIX)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_XVID_ILACE }, INT_MIN, INT_MAX, V|D, "bug"},
+{"ump4", "(autodetected if fourcc==UMP4)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_UMP4 }, INT_MIN, INT_MAX, V|D, "bug"},
+{"no_padding", "padding bug (autodetected)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_NO_PADDING }, INT_MIN, INT_MAX, V|D, "bug"},
+{"amv", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_AMV }, INT_MIN, INT_MAX, V|D, "bug"},
+{"ac_vlc", "illegal vlc bug (autodetected per fourcc)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_AC_VLC }, INT_MIN, INT_MAX, V|D, "bug"},
+{"qpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_QPEL_CHROMA }, INT_MIN, INT_MAX, V|D, "bug"},
+{"std_qpel", "old standard qpel (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_STD_QPEL }, INT_MIN, INT_MAX, V|D, "bug"},
+{"qpel_chroma2", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_QPEL_CHROMA2 }, INT_MIN, INT_MAX, V|D, "bug"},
+{"direct_blocksize", "direct-qpel-blocksize bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_DIRECT_BLOCKSIZE }, INT_MIN, INT_MAX, V|D, "bug"},
+{"edge", "edge padding bug (autodetected per fourcc/version)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_EDGE }, INT_MIN, INT_MAX, V|D, "bug"},
+{"hpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_HPEL_CHROMA }, INT_MIN, INT_MAX, V|D, "bug"},
+{"dc_clip", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_DC_CLIP }, INT_MIN, INT_MAX, V|D, "bug"},
+{"ms", "workaround various bugs in microsofts broken decoders", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_MS }, INT_MIN, INT_MAX, V|D, "bug"},
+{"trunc", "trancated frames", 0, FF_OPT_TYPE_CONST, {.dbl = FF_BUG_TRUNCATED}, INT_MIN, INT_MAX, V|D, "bug"},
+{"lelim", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)", OFFSET(luma_elim_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"celim", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)", OFFSET(chroma_elim_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, A|V|D|E, "strict"},
+{"very", "strictly conform to a older more strict version of the spec or reference software", 0, FF_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_VERY_STRICT }, INT_MIN, INT_MAX, V|D|E, "strict"},
+{"strict", "strictly conform to all the things in the spec no matter what consequences", 0, FF_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_STRICT }, INT_MIN, INT_MAX, V|D|E, "strict"},
+{"normal", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_NORMAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
+{"unofficial", "allow unofficial extensions", 0, FF_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_UNOFFICIAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
+{"experimental", "allow non standardized experimental things", 0, FF_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_EXPERIMENTAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
+{"b_qoffset", "qp offset between P and B frames", OFFSET(b_quant_offset), FF_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
+{"er", "set error detection aggressivity", OFFSET(error_recognition), FF_OPT_TYPE_INT, {.dbl = FF_ER_CAREFUL }, INT_MIN, INT_MAX, A|V|D, "er"},
+{"careful", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_ER_CAREFUL }, INT_MIN, INT_MAX, V|D, "er"},
+{"compliant", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_ER_COMPLIANT }, INT_MIN, INT_MAX, V|D, "er"},
+{"aggressive", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_ER_AGGRESSIVE }, INT_MIN, INT_MAX, V|D, "er"},
+{"very_aggressive", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_ER_VERY_AGGRESSIVE }, INT_MIN, INT_MAX, V|D, "er"},
+{"has_b_frames", NULL, OFFSET(has_b_frames), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"block_align", NULL, OFFSET(block_align), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"parse_only", NULL, OFFSET(parse_only), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"mpeg_quant", "use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"stats_out", NULL, OFFSET(stats_out), FF_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX},
+{"stats_in", NULL, OFFSET(stats_in), FF_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX},
+{"qsquish", "how to keep quantizer between qmin and qmax (0 = clip, 1 = use differentiable function)", OFFSET(rc_qsquish), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 99, V|E},
+{"rc_qmod_amp", "experimental quantizer modulation", OFFSET(rc_qmod_amp), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
+{"rc_qmod_freq", "experimental quantizer modulation", OFFSET(rc_qmod_freq), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"rc_override_count", NULL, OFFSET(rc_override_count), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"rc_eq", "set rate control equation", OFFSET(rc_eq), FF_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, V|E},
+{"maxrate", "set max video bitrate tolerance (in bits/s)", OFFSET(rc_max_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"minrate", "set min video bitrate tolerance (in bits/s)", OFFSET(rc_min_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, A|V|E},
+{"rc_buf_aggressivity", "currently useless", OFFSET(rc_buffer_aggressivity), FF_OPT_TYPE_FLOAT, {.dbl = 1.0 }, -FLT_MAX, FLT_MAX, V|E},
+{"i_qfactor", "qp factor between P and I frames", OFFSET(i_quant_factor), FF_OPT_TYPE_FLOAT, {.dbl = -0.8 }, -FLT_MAX, FLT_MAX, V|E},
+{"i_qoffset", "qp offset between P and I frames", OFFSET(i_quant_offset), FF_OPT_TYPE_FLOAT, {.dbl = 0.0 }, -FLT_MAX, FLT_MAX, V|E},
+{"rc_init_cplx", "initial complexity for 1-pass encoding", OFFSET(rc_initial_cplx), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
+{"dct", "DCT algorithm", OFFSET(dct_algo), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, V|E, "dct"},
+{"auto", "autoselect a good one (default)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
+{"fastint", "fast integer", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"int", "accurate integer", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"mmx", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
+{"mlib", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DCT_MLIB }, INT_MIN, INT_MAX, V|E, "dct"},
+{"altivec", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", "floating point AAN DCT", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
+{"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), FF_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), FF_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), FF_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"p_mask", "inter masking", OFFSET(p_masking), FF_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), FF_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
+{"idct", "select IDCT implementation", OFFSET(idct_algo), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, V|E|D, "idct"},
+{"auto", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"int", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_INT }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simple", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplemmx", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"libmpeg2mmx", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_LIBMPEG2MMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"ps2", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_PS2 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"mlib", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_MLIB }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"arm", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"altivec", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"sh4", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SH4 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearm", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv5te", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv6", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simpleneon", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplealpha", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"h264", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_H264 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"vp3", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_VP3 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"ipp", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"xvidmmx", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_XVIDMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"faani", "floating point AAN IDCT", 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
+{"slice_count", NULL, OFFSET(slice_count), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"ec", "set error concealment strategy", OFFSET(error_concealment), FF_OPT_TYPE_FLAGS, {.dbl = 3 }, INT_MIN, INT_MAX, V|D, "ec"},
+{"guess_mvs", "iterative motion vector (MV) search (slow)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_EC_GUESS_MVS }, INT_MIN, INT_MAX, V|D, "ec"},
+{"deblock", "use strong deblock filter for damaged MBs", 0, FF_OPT_TYPE_CONST, {.dbl = FF_EC_DEBLOCK }, INT_MIN, INT_MAX, V|D, "ec"},
+{"bits_per_coded_sample", NULL, OFFSET(bits_per_coded_sample), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"pred", "prediction method", OFFSET(prediction_method), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "pred"},
+{"left", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_LEFT }, INT_MIN, INT_MAX, V|E, "pred"},
+{"plane", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_PLANE }, INT_MIN, INT_MAX, V|E, "pred"},
+{"median", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_MEDIAN }, INT_MIN, INT_MAX, V|E, "pred"},
+{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E},
+{"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, INT_MAX, V|A|S|E|D, "debug"},
+{"pict", "picture info", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_PICT_INFO }, INT_MIN, INT_MAX, V|D, "debug"},
+{"rc", "rate control", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_RC }, INT_MIN, INT_MAX, V|E, "debug"},
+{"bitstream", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_BITSTREAM }, INT_MIN, INT_MAX, V|D, "debug"},
+{"mb_type", "macroblock (MB) type", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_MB_TYPE }, INT_MIN, INT_MAX, V|D, "debug"},
+{"qp", "per-block quantization parameter (QP)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_QP }, INT_MIN, INT_MAX, V|D, "debug"},
+{"mv", "motion vector", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_MV }, INT_MIN, INT_MAX, V|D, "debug"},
+{"dct_coeff", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_DCT_COEFF }, INT_MIN, INT_MAX, V|D, "debug"},
+{"skip", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_SKIP }, INT_MIN, INT_MAX, V|D, "debug"},
+{"startcode", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_STARTCODE }, INT_MIN, INT_MAX, V|D, "debug"},
+{"pts", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_PTS }, INT_MIN, INT_MAX, V|D, "debug"},
+{"er", "error recognition", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_ER }, INT_MIN, INT_MAX, V|D, "debug"},
+{"mmco", "memory management control operations (H.264)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_MMCO }, INT_MIN, INT_MAX, V|D, "debug"},
+{"bugs", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_BUGS }, INT_MIN, INT_MAX, V|D, "debug"},
+{"vis_qp", "visualize quantization parameter (QP), lower QP are tinted greener", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_VIS_QP }, INT_MIN, INT_MAX, V|D, "debug"},
+{"vis_mb_type", "visualize block types", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_VIS_MB_TYPE }, INT_MIN, INT_MAX, V|D, "debug"},
+{"buffers", "picture buffer allocations", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_BUFFERS }, INT_MIN, INT_MAX, V|D, "debug"},
+{"thread_ops", "threading operations", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_THREADS }, INT_MIN, INT_MAX, V|D, "debug"},
+{"vismv", "visualize motion vectors (MVs)", OFFSET(debug_mv), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, V|D, "debug_mv"},
+{"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_VIS_MV_P_FOR }, INT_MIN, INT_MAX, V|D, "debug_mv"},
+{"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_VIS_MV_B_FOR }, INT_MIN, INT_MAX, V|D, "debug_mv"},
+{"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_VIS_MV_B_BACK }, INT_MIN, INT_MAX, V|D, "debug_mv"},
+{"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"ildctcmp", "interlaced dct compare function", OFFSET(ildct_cmp), FF_OPT_TYPE_INT, {.dbl = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"dia_size", "diamond type & size for motion estimation", OFFSET(dia_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"last_pred", "amount of motion predictors from the previous frame", OFFSET(last_predictor_count), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"preme", "pre motion estimation", OFFSET(pre_me), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sad", "sum of absolute differences, fast (default)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sse", "sum of squared errors", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_SSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"satd", "sum of absolute Hadamard transformed differences", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_SATD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"dct", "sum of absolute DCT transformed differences", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_DCT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"psnr", "sum of squared quantization errors (avoid, low quality)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_PSNR }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"bit", "number of bits needed for the block", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_BIT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"rd", "rate distortion optimal, slow", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_RD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"zero", "0", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_ZERO }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsad", "sum of absolute vertical differences", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsse", "sum of squared vertical differences", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_VSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"nsse", "noise preserving sum of squared differences", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_NSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 #if CONFIG_SNOW_ENCODER
-{"w53", "5/3 wavelet, only used in snow", 0, FF_OPT_TYPE_CONST, FF_CMP_W53, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"w97", "9/7 wavelet, only used in snow", 0, FF_OPT_TYPE_CONST, FF_CMP_W97, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"w53", "5/3 wavelet, only used in snow", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_W53 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"w97", "9/7 wavelet, only used in snow", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_W97 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 #endif
-{"dctmax", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_DCTMAX, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_CHROMA, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"subq", "sub pel motion estimation quality", OFFSET(me_subpel_quality), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E},
-{"dtg_active_format", NULL, OFFSET(dtg_active_format), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"ibias", "intra quant bias", OFFSET(intra_quant_bias), FF_OPT_TYPE_INT, FF_DEFAULT_QUANT_BIAS, INT_MIN, INT_MAX, V|E},
-{"pbias", "inter quant bias", OFFSET(inter_quant_bias), FF_OPT_TYPE_INT, FF_DEFAULT_QUANT_BIAS, INT_MIN, INT_MAX, V|E},
-{"color_table_id", NULL, OFFSET(color_table_id), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"internal_buffer_count", NULL, OFFSET(internal_buffer_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"global_quality", NULL, OFFSET(global_quality), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"coder", NULL, OFFSET(coder_type), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "coder"},
-{"vlc", "variable length coder / huffman coder", 0, FF_OPT_TYPE_CONST, FF_CODER_TYPE_VLC, INT_MIN, INT_MAX, V|E, "coder"},
-{"ac", "arithmetic coder", 0, FF_OPT_TYPE_CONST, FF_CODER_TYPE_AC, INT_MIN, INT_MAX, V|E, "coder"},
-{"raw", "raw (no encoding)", 0, FF_OPT_TYPE_CONST, FF_CODER_TYPE_RAW, INT_MIN, INT_MAX, V|E, "coder"},
-{"rle", "run-length coder", 0, FF_OPT_TYPE_CONST, FF_CODER_TYPE_RLE, INT_MIN, INT_MAX, V|E, "coder"},
-{"deflate", "deflate-based coder", 0, FF_OPT_TYPE_CONST, FF_CODER_TYPE_DEFLATE, INT_MIN, INT_MAX, V|E, "coder"},
-{"context", "context model", OFFSET(context_model), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"slice_flags", NULL, OFFSET(slice_flags), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "mbd"},
-{"simple", "use mbcmp (default)", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_SIMPLE, INT_MIN, INT_MAX, V|E, "mbd"},
-{"bits", "use fewest bits", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_BITS, INT_MIN, INT_MAX, V|E, "mbd"},
-{"rd", "use best rate distortion", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_RD, INT_MIN, INT_MAX, V|E, "mbd"},
-{"stream_codec_tag", NULL, OFFSET(stream_codec_tag), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"sc_threshold", "scene change threshold", OFFSET(scenechange_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"lmin", "min lagrange factor (VBR)", OFFSET(lmin), FF_OPT_TYPE_INT,  2*FF_QP2LAMBDA, 0, INT_MAX, V|E},
-{"lmax", "max lagrange factor (VBR)", OFFSET(lmax), FF_OPT_TYPE_INT, 31*FF_QP2LAMBDA, 0, INT_MAX, V|E},
-{"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, CODEC_FLAG2_FASTPSKIP|CODEC_FLAG2_BIT_RESERVOIR|CODEC_FLAG2_PSY|CODEC_FLAG2_MBTREE, 0, UINT_MAX, V|A|E|D, "flags2"},
-{"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"dctmax", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"chroma", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"subq", "sub pel motion estimation quality", OFFSET(me_subpel_quality), FF_OPT_TYPE_INT, {.dbl = 8 }, INT_MIN, INT_MAX, V|E},
+{"dtg_active_format", NULL, OFFSET(dtg_active_format), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"ibias", "intra quant bias", OFFSET(intra_quant_bias), FF_OPT_TYPE_INT, {.dbl = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, V|E},
+{"pbias", "inter quant bias", OFFSET(inter_quant_bias), FF_OPT_TYPE_INT, {.dbl = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, V|E},
+{"color_table_id", NULL, OFFSET(color_table_id), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"internal_buffer_count", NULL, OFFSET(internal_buffer_count), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"global_quality", NULL, OFFSET(global_quality), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"coder", NULL, OFFSET(coder_type), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "coder"},
+{"vlc", "variable length coder / huffman coder", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CODER_TYPE_VLC }, INT_MIN, INT_MAX, V|E, "coder"},
+{"ac", "arithmetic coder", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CODER_TYPE_AC }, INT_MIN, INT_MAX, V|E, "coder"},
+{"raw", "raw (no encoding)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CODER_TYPE_RAW }, INT_MIN, INT_MAX, V|E, "coder"},
+{"rle", "run-length coder", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CODER_TYPE_RLE }, INT_MIN, INT_MAX, V|E, "coder"},
+{"deflate", "deflate-based coder", 0, FF_OPT_TYPE_CONST, {.dbl = FF_CODER_TYPE_DEFLATE }, INT_MIN, INT_MAX, V|E, "coder"},
+{"context", "context model", OFFSET(context_model), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"slice_flags", NULL, OFFSET(slice_flags), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"simple", "use mbcmp (default)", 0, FF_OPT_TYPE_CONST, {.dbl = FF_MB_DECISION_SIMPLE }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"bits", "use fewest bits", 0, FF_OPT_TYPE_CONST, {.dbl = FF_MB_DECISION_BITS }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"rd", "use best rate distortion", 0, FF_OPT_TYPE_CONST, {.dbl = FF_MB_DECISION_RD }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"stream_codec_tag", NULL, OFFSET(stream_codec_tag), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"sc_threshold", "scene change threshold", OFFSET(scenechange_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"lmin", "min lagrange factor (VBR)", OFFSET(lmin), FF_OPT_TYPE_INT, {.dbl =  2*FF_QP2LAMBDA }, 0, INT_MAX, V|E},
+{"lmax", "max lagrange factor (VBR)", OFFSET(lmax), FF_OPT_TYPE_INT, {.dbl = 31*FF_QP2LAMBDA }, 0, INT_MAX, V|E},
+{"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, {.dbl = CODEC_FLAG2_FASTPSKIP|CODEC_FLAG2_BIT_RESERVOIR|CODEC_FLAG2_PSY|CODEC_FLAG2_MBTREE }, 0, UINT_MAX, V|A|E|D, "flags2"},
+{"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #if FF_API_ANTIALIAS_ALGO
-{"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
+{"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|D, "aa"},
 #endif
-{"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"},
-{"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FASTINT, INT_MIN, INT_MAX, V|D, "aa"},
-{"int", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_INT, INT_MIN, INT_MAX, V|D, "aa"},
-{"float", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FLOAT, INT_MIN, INT_MAX, V|D, "aa"},
-{"qns", "quantizer noise shaping", OFFSET(quantizer_noise_shaping), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"threads", NULL, OFFSET(thread_count), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E|D},
-{"me_threshold", "motion estimaton threshold", OFFSET(me_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
-{"nssew", "nsse weight", OFFSET(nsse_weight), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E},
-{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
-{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
-{"profile", NULL, OFFSET(profile), FF_OPT_TYPE_INT, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
-{"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
-{"aac_main", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_AAC_MAIN, INT_MIN, INT_MAX, A|E, "profile"},
-{"aac_low", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_AAC_LOW, INT_MIN, INT_MAX, A|E, "profile"},
-{"aac_ssr", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_AAC_SSR, INT_MIN, INT_MAX, A|E, "profile"},
-{"aac_ltp", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_AAC_LTP, INT_MIN, INT_MAX, A|E, "profile"},
-{"dts", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS, INT_MIN, INT_MAX, A|E, "profile"},
-{"dts_es", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_ES, INT_MIN, INT_MAX, A|E, "profile"},
-{"dts_96_24", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_96_24, INT_MIN, INT_MAX, A|E, "profile"},
-{"dts_hd_hra", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_HD_HRA, INT_MIN, INT_MAX, A|E, "profile"},
-{"dts_hd_ma", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_HD_MA, INT_MIN, INT_MAX, A|E, "profile"},
-{"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"},
-{"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"},
-{"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|A|D},
-{"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), FF_OPT_TYPE_INT, FF_CMP_DCTMAX, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"border_mask", "increases the quantizer for macroblocks close to borders", OFFSET(border_masking), FF_OPT_TYPE_FLOAT, DEFAULT, -FLT_MAX, FLT_MAX, V|E},
-{"mblmin", "min macroblock lagrange factor (VBR)", OFFSET(mb_lmin), FF_OPT_TYPE_INT, FF_QP2LAMBDA * 2, 1, FF_LAMBDA_MAX, V|E},
-{"mblmax", "max macroblock lagrange factor (VBR)", OFFSET(mb_lmax), FF_OPT_TYPE_INT, FF_QP2LAMBDA * 31, 1, FF_LAMBDA_MAX, V|E},
-{"mepc", "motion estimation bitrate penalty compensation (1.0 = 256)", OFFSET(me_penalty_compensation), FF_OPT_TYPE_INT, 256, INT_MIN, INT_MAX, V|E},
-{"skip_loop_filter", NULL, OFFSET(skip_loop_filter), FF_OPT_TYPE_INT, AVDISCARD_DEFAULT, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"skip_idct"       , NULL, OFFSET(skip_idct)       , FF_OPT_TYPE_INT, AVDISCARD_DEFAULT, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"skip_frame"      , NULL, OFFSET(skip_frame)      , FF_OPT_TYPE_INT, AVDISCARD_DEFAULT, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"none"            , NULL, 0, FF_OPT_TYPE_CONST, AVDISCARD_NONE   , INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"default"         , NULL, 0, FF_OPT_TYPE_CONST, AVDISCARD_DEFAULT, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"noref"           , NULL, 0, FF_OPT_TYPE_CONST, AVDISCARD_NONREF , INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"bidir"           , NULL, 0, FF_OPT_TYPE_CONST, AVDISCARD_BIDIR  , INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"nokey"           , NULL, 0, FF_OPT_TYPE_CONST, AVDISCARD_NONKEY , INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"all"             , NULL, 0, FF_OPT_TYPE_CONST, AVDISCARD_ALL    , INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), FF_OPT_TYPE_INT, 1, 0, 4, V|E},
-{"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, DEFAULT, 0, 10, V|E},
-{"crf", "enables constant quality mode, and selects the quality (x264)", OFFSET(crf), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E},
-{"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
-{"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, 25, INT_MIN, INT_MAX, V|E},
-{"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E},
-{"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"directpred", "direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal), 3 (auto)", OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
-{"bpyramid", "allows B-frames to be used as references for predicting", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"},
-{"wpred", "weighted biprediction for b-frames (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
-{"mixed_refs", "one reference per partition, as opposed to one reference per macroblock", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
-{"dct8x8", "high profile 8x8 transform (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
-{"fastpskip", "fast pskip (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
-{"aud", "access unit delimiters (H.264)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
-{"skiprd", "RD optimal MB level residual skipping", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_SKIP_RD, INT_MIN, INT_MAX, V|E, "flags2"},
-{"complexityblur", "reduce fluctuations in qp (before curve compression)", OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
-{"deblockalpha", "in-loop deblocking filter alphac0 parameter", OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
-{"deblockbeta", "in-loop deblocking filter beta parameter", OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, -6, 6, V|E},
-{"partitions", "macroblock subpartition sizes to consider", OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
-{"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I4X4, INT_MIN, INT_MAX, V|E, "partitions"},
-{"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I8X8, INT_MIN, INT_MAX, V|E, "partitions"},
-{"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P4X4, INT_MIN, INT_MAX, V|E, "partitions"},
-{"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"},
-{"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"},
-{"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
-{"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, 256, 0, INT_MAX, V|E},
-{"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"},
-{"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
-{"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E},
-{"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"auto", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_AUTO }, INT_MIN, INT_MAX, V|D, "aa"},
+{"fastint", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FASTINT }, INT_MIN, INT_MAX, V|D, "aa"},
+{"int", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_INT }, INT_MIN, INT_MAX, V|D, "aa"},
+{"float", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_AA_FLOAT }, INT_MIN, INT_MAX, V|D, "aa"},
+{"qns", "quantizer noise shaping", OFFSET(quantizer_noise_shaping), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"threads", NULL, OFFSET(thread_count), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E|D},
+{"me_threshold", "motion estimaton threshold", OFFSET(me_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), FF_OPT_TYPE_INT, {.dbl = 0 }, INT_MIN, INT_MAX, V|E},
+{"nssew", "nsse weight", OFFSET(nsse_weight), FF_OPT_TYPE_INT, {.dbl = 8 }, INT_MIN, INT_MAX, V|E},
+{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|D},
+{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|D},
+{"profile", NULL, OFFSET(profile), FF_OPT_TYPE_INT, {.dbl = FF_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "profile"},
+{"unknown", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "profile"},
+{"aac_main", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_MAIN }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_low", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LOW }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_ssr", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_SSR }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_ltp", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LTP }, INT_MIN, INT_MAX, A|E, "profile"},
+{"dts", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS }, INT_MIN, INT_MAX, A|E, "profile"},
+{"dts_es", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_ES }, INT_MIN, INT_MAX, A|E, "profile"},
+{"dts_96_24", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_96_24 }, INT_MIN, INT_MAX, A|E, "profile"},
+{"dts_hd_hra", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_HD_HRA }, INT_MIN, INT_MAX, A|E, "profile"},
+{"dts_hd_ma", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_HD_MA }, INT_MIN, INT_MAX, A|E, "profile"},
+{"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, {.dbl = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
+{"unknown", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
+{"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, INT_MAX, V|A|D},
+{"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), FF_OPT_TYPE_INT, {.dbl = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"border_mask", "increases the quantizer for macroblocks close to borders", OFFSET(border_masking), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
+{"mblmin", "min macroblock lagrange factor (VBR)", OFFSET(mb_lmin), FF_OPT_TYPE_INT, {.dbl = FF_QP2LAMBDA * 2 }, 1, FF_LAMBDA_MAX, V|E},
+{"mblmax", "max macroblock lagrange factor (VBR)", OFFSET(mb_lmax), FF_OPT_TYPE_INT, {.dbl = FF_QP2LAMBDA * 31 }, 1, FF_LAMBDA_MAX, V|E},
+{"mepc", "motion estimation bitrate penalty compensation (1.0 = 256)", OFFSET(me_penalty_compensation), FF_OPT_TYPE_INT, {.dbl = 256 }, INT_MIN, INT_MAX, V|E},
+{"skip_loop_filter", NULL, OFFSET(skip_loop_filter), FF_OPT_TYPE_INT, {.dbl = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"skip_idct"       , NULL, OFFSET(skip_idct)       , FF_OPT_TYPE_INT, {.dbl = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"skip_frame"      , NULL, OFFSET(skip_frame)      , FF_OPT_TYPE_INT, {.dbl = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"none"            , NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AVDISCARD_NONE    }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"default"         , NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"noref"           , NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AVDISCARD_NONREF  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"bidir"           , NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AVDISCARD_BIDIR   }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"nokey"           , NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AVDISCARD_NONKEY  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"all"             , NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AVDISCARD_ALL     }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 4, V|E},
+{"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, 10, V|E},
+{"crf", "enables constant quality mode, and selects the quality (x264)", OFFSET(crf), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 51, V|E},
+{"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, V|E},
+{"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, {.dbl = 25 }, INT_MIN, INT_MAX, V|E},
+{"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E},
+{"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
+{"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+{"directpred", "direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal), 3 (auto)", OFFSET(directpred), FF_OPT_TYPE_INT, {.dbl = 2 }, INT_MIN, INT_MAX, V|E},
+{"bpyramid", "allows B-frames to be used as references for predicting", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_BPYRAMID }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"wpred", "weighted biprediction for b-frames (H.264)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_WPRED }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"mixed_refs", "one reference per partition, as opposed to one reference per macroblock", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_MIXED_REFS }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"dct8x8", "high profile 8x8 transform (H.264)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_8X8DCT }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"fastpskip", "fast pskip (H.264)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_FASTPSKIP }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"aud", "access unit delimiters (H.264)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_AUD }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"skiprd", "RD optimal MB level residual skipping", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_SKIP_RD }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"complexityblur", "reduce fluctuations in qp (before curve compression)", OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, {.dbl = 20.0 }, FLT_MIN, FLT_MAX, V|E},
+{"deblockalpha", "in-loop deblocking filter alphac0 parameter", OFFSET(deblockalpha), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, -6, 6, V|E},
+{"deblockbeta", "in-loop deblocking filter beta parameter", OFFSET(deblockbeta), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, -6, 6, V|E},
+{"partitions", "macroblock subpartition sizes to consider", OFFSET(partitions), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E, "partitions"},
+{"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = X264_PART_I4X4 }, INT_MIN, INT_MAX, V|E, "partitions"},
+{"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = X264_PART_I8X8 }, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = X264_PART_P4X4 }, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = X264_PART_P8X8 }, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = X264_PART_B8X8 }, INT_MIN, INT_MAX, V|E, "partitions"},
+{"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), FF_OPT_TYPE_INT, {.dbl = 6 }, 0, INT_MAX, V|E},
+{"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, {.dbl = 256 }, 0, INT_MAX, V|E},
+{"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_INTRA_VLC }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, {.dbl = 40 }, 1, INT_MAX, V|E},
+{"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, {.dbl = FF_COMPRESSION_DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+{"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+{"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
 #if FF_API_FLAC_GLOBAL_OPTS
-{"lpc_coeff_precision", "deprecated, use flac-specific options", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
-{"prediction_order_method", "deprecated, use flac-specific options", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"min_partition_order", "deprecated, use flac-specific options", OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
-{"max_partition_order", "deprecated, use flac-specific options", OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"lpc_coeff_precision", "deprecated, use flac-specific options", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, A|E},
+{"prediction_order_method", "deprecated, use flac-specific options", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+{"min_partition_order", "deprecated, use flac-specific options", OFFSET(min_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+{"max_partition_order", "deprecated, use flac-specific options", OFFSET(max_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
 #endif
-{"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT64, 0, 0, INT64_MAX, V|E},
-{"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_DROP_FRAME_TIMECODE, INT_MIN, INT_MAX, V|E, "flags2"},
-{"non_linear_q", "use non linear quantizer", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_NON_LINEAR_QUANT, INT_MIN, INT_MAX, V|E, "flags2"},
+{"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT64, {.dbl = 0 }, 0, INT64_MAX, V|E},
+{"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_DROP_FRAME_TIMECODE }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"non_linear_q", "use non linear quantizer", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_NON_LINEAR_QUANT }, INT_MIN, INT_MAX, V|E, "flags2"},
 #if FF_API_REQUEST_CHANNELS
-{"request_channels", "set desired number of audio channels", OFFSET(request_channels), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|D},
+{"request_channels", "set desired number of audio channels", OFFSET(request_channels), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, A|D},
 #endif
-{"drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), FF_OPT_TYPE_FLOAT, 1.0, 0.0, 1.0, A|D},
-{"reservoir", "use bit reservoir", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BIT_RESERVOIR, INT_MIN, INT_MAX, A|E, "flags2"},
-{"mbtree", "use macroblock tree ratecontrol (x264 only)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MBTREE, INT_MIN, INT_MAX, V|E, "flags2"},
-{"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"channel_layout", NULL, OFFSET(channel_layout), FF_OPT_TYPE_INT64, DEFAULT, 0, INT64_MAX, A|E|D, "channel_layout"},
-{"request_channel_layout", NULL, OFFSET(request_channel_layout), FF_OPT_TYPE_INT64, DEFAULT, 0, INT64_MAX, A|D, "request_channel_layout"},
-{"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), FF_OPT_TYPE_FLOAT, 1.0/3, 0.0, FLT_MAX, V|E},
-{"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use),  FF_OPT_TYPE_FLOAT, 3,     0.0, FLT_MAX, V|E},
-{"ticks_per_frame", NULL, OFFSET(ticks_per_frame), FF_OPT_TYPE_INT, 1, 1, INT_MAX, A|V|E|D},
-{"color_primaries", NULL, OFFSET(color_primaries), FF_OPT_TYPE_INT, AVCOL_PRI_UNSPECIFIED, 1, AVCOL_PRI_NB-1, V|E|D},
-{"color_trc", NULL, OFFSET(color_trc), FF_OPT_TYPE_INT, AVCOL_TRC_UNSPECIFIED, 1, AVCOL_TRC_NB-1, V|E|D},
-{"colorspace", NULL, OFFSET(colorspace), FF_OPT_TYPE_INT, AVCOL_SPC_UNSPECIFIED, 1, AVCOL_SPC_NB-1, V|E|D},
-{"color_range", NULL, OFFSET(color_range), FF_OPT_TYPE_INT, AVCOL_RANGE_UNSPECIFIED, 0, AVCOL_RANGE_NB-1, V|E|D},
-{"chroma_sample_location", NULL, OFFSET(chroma_sample_location), FF_OPT_TYPE_INT, AVCHROMA_LOC_UNSPECIFIED, 0, AVCHROMA_LOC_NB-1, V|E|D},
-{"psy", "use psycho visual optimization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_PSY, INT_MIN, INT_MAX, V|E, "flags2"},
-{"psy_rd", "specify psycho visual strength", OFFSET(psy_rd), FF_OPT_TYPE_FLOAT, 1.0, 0, FLT_MAX, V|E},
-{"psy_trellis", "specify psycho visual trellis", OFFSET(psy_trellis), FF_OPT_TYPE_FLOAT, 0, 0, FLT_MAX, V|E},
-{"aq_mode", "specify aq method", OFFSET(aq_mode), FF_OPT_TYPE_INT, 1, 0, INT_MAX, V|E},
-{"aq_strength", "specify aq strength", OFFSET(aq_strength), FF_OPT_TYPE_FLOAT, 1.0, 0, FLT_MAX, V|E},
-{"rc_lookahead", "specify number of frames to look ahead for frametype", OFFSET(rc_lookahead), FF_OPT_TYPE_INT, 40, 0, INT_MAX, V|E},
-{"ssim", "ssim will be calculated during encoding", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_SSIM, INT_MIN, INT_MAX, V|E, "flags2"},
-{"intra_refresh", "use periodic insertion of intra blocks instead of keyframes", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_REFRESH, INT_MIN, INT_MAX, V|E, "flags2"},
-{"crf_max", "in crf mode, prevents vbv from lowering quality beyond this point", OFFSET(crf_max), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E},
-{"log_level_offset", "set the log level offset", OFFSET(log_level_offset), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX },
+{"drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), FF_OPT_TYPE_FLOAT, {.dbl = 1.0 }, 0.0, 1.0, A|D},
+{"reservoir", "use bit reservoir", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_BIT_RESERVOIR }, INT_MIN, INT_MAX, A|E, "flags2"},
+{"mbtree", "use macroblock tree ratecontrol (x264 only)", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_MBTREE }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
+{"channel_layout", NULL, OFFSET(channel_layout), FF_OPT_TYPE_INT64, {.dbl = DEFAULT }, 0, INT64_MAX, A|E|D, "channel_layout"},
+{"request_channel_layout", NULL, OFFSET(request_channel_layout), FF_OPT_TYPE_INT64, {.dbl = DEFAULT }, 0, INT64_MAX, A|D, "request_channel_layout"},
+{"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), FF_OPT_TYPE_FLOAT, {.dbl = 1.0/3 }, 0.0, FLT_MAX, V|E},
+{"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use),  FF_OPT_TYPE_FLOAT, {.dbl = 3 },     0.0, FLT_MAX, V|E},
+{"ticks_per_frame", NULL, OFFSET(ticks_per_frame), FF_OPT_TYPE_INT, {.dbl = 1 }, 1, INT_MAX, A|V|E|D},
+{"color_primaries", NULL, OFFSET(color_primaries), FF_OPT_TYPE_INT, {.dbl = AVCOL_PRI_UNSPECIFIED }, 1, AVCOL_PRI_NB-1, V|E|D},
+{"color_trc", NULL, OFFSET(color_trc), FF_OPT_TYPE_INT, {.dbl = AVCOL_TRC_UNSPECIFIED }, 1, AVCOL_TRC_NB-1, V|E|D},
+{"colorspace", NULL, OFFSET(colorspace), FF_OPT_TYPE_INT, {.dbl = AVCOL_SPC_UNSPECIFIED }, 1, AVCOL_SPC_NB-1, V|E|D},
+{"color_range", NULL, OFFSET(color_range), FF_OPT_TYPE_INT, {.dbl = AVCOL_RANGE_UNSPECIFIED }, 0, AVCOL_RANGE_NB-1, V|E|D},
+{"chroma_sample_location", NULL, OFFSET(chroma_sample_location), FF_OPT_TYPE_INT, {.dbl = AVCHROMA_LOC_UNSPECIFIED }, 0, AVCHROMA_LOC_NB-1, V|E|D},
+{"psy", "use psycho visual optimization", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_PSY }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"psy_rd", "specify psycho visual strength", OFFSET(psy_rd), FF_OPT_TYPE_FLOAT, {.dbl = 1.0 }, 0, FLT_MAX, V|E},
+{"psy_trellis", "specify psycho visual trellis", OFFSET(psy_trellis), FF_OPT_TYPE_FLOAT, {.dbl = 0 }, 0, FLT_MAX, V|E},
+{"aq_mode", "specify aq method", OFFSET(aq_mode), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, INT_MAX, V|E},
+{"aq_strength", "specify aq strength", OFFSET(aq_strength), FF_OPT_TYPE_FLOAT, {.dbl = 1.0 }, 0, FLT_MAX, V|E},
+{"rc_lookahead", "specify number of frames to look ahead for frametype", OFFSET(rc_lookahead), FF_OPT_TYPE_INT, {.dbl = 40 }, 0, INT_MAX, V|E},
+{"ssim", "ssim will be calculated during encoding", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_SSIM }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"intra_refresh", "use periodic insertion of intra blocks instead of keyframes", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_INTRA_REFRESH }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"crf_max", "in crf mode, prevents vbv from lowering quality beyond this point", OFFSET(crf_max), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 51, V|E},
+{"log_level_offset", "set the log level offset", OFFSET(log_level_offset), FF_OPT_TYPE_INT, {.dbl = 0 }, INT_MIN, INT_MAX },
 #if FF_API_FLAC_GLOBAL_OPTS
-{"lpc_type", "deprecated, use flac-specific options", OFFSET(lpc_type), FF_OPT_TYPE_INT, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E},
-{"none",     NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_NONE,     INT_MIN, INT_MAX, A|E, "lpc_type"},
-{"fixed",    NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_FIXED,    INT_MIN, INT_MAX, A|E, "lpc_type"},
-{"levinson", NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_LEVINSON, INT_MIN, INT_MAX, A|E, "lpc_type"},
-{"cholesky", NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, A|E, "lpc_type"},
-{"lpc_passes", "deprecated, use flac-specific options", OFFSET(lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"lpc_type", "deprecated, use flac-specific options", OFFSET(lpc_type), FF_OPT_TYPE_INT, {.dbl = AV_LPC_TYPE_DEFAULT }, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E},
+{"none",     NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_NONE },     INT_MIN, INT_MAX, A|E, "lpc_type"},
+{"fixed",    NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_FIXED },    INT_MIN, INT_MAX, A|E, "lpc_type"},
+{"levinson", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_LEVINSON }, INT_MIN, INT_MAX, A|E, "lpc_type"},
+{"cholesky", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_CHOLESKY }, INT_MIN, INT_MAX, A|E, "lpc_type"},
+{"lpc_passes", "deprecated, use flac-specific options", OFFSET(lpc_passes), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
 #endif
-{"slices", "number of slices, used in parallelized decoding", OFFSET(slices), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
-{"thread_type", "select multithreading type", OFFSET(thread_type), FF_OPT_TYPE_INT, FF_THREAD_SLICE|FF_THREAD_FRAME, 0, INT_MAX, V|E|D, "thread_type"},
-{"slice", NULL, 0, FF_OPT_TYPE_CONST, FF_THREAD_SLICE, INT_MIN, INT_MAX, V|E|D, "thread_type"},
-{"frame", NULL, 0, FF_OPT_TYPE_CONST, FF_THREAD_FRAME, INT_MIN, INT_MAX, V|E|D, "thread_type"},
-{"vbv_delay", "initial buffer fill time in periods of 27Mhz clock", 0, FF_OPT_TYPE_INT64, 0, 0, INT64_MAX},
-{"audio_service_type", "audio service type", OFFSET(audio_service_type), FF_OPT_TYPE_INT, AV_AUDIO_SERVICE_TYPE_MAIN, 0, AV_AUDIO_SERVICE_TYPE_NB-1, A|E, "audio_service_type"},
-{"ma", "Main Audio Service", 0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_MAIN,              INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"ef", "Effects",            0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_EFFECTS,           INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"vi", "Visually Impaired",  0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED, INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"hi", "Hearing Impaired",   0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED,  INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"di", "Dialogue",           0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_DIALOGUE,          INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"co", "Commentary",         0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_COMMENTARY,        INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"em", "Emergency",          0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_EMERGENCY,         INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"vo", "Voice Over",         0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_VOICE_OVER,        INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"ka", "Karaoke",            0, FF_OPT_TYPE_CONST, AV_AUDIO_SERVICE_TYPE_KARAOKE,           INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"slices", "number of slices, used in parallelized decoding", OFFSET(slices), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, INT_MAX, V|E},
+{"thread_type", "select multithreading type", OFFSET(thread_type), FF_OPT_TYPE_INT, {.dbl = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|E|D, "thread_type"},
+{"slice", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_THREAD_SLICE }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
+{"frame", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_THREAD_FRAME }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
+{"vbv_delay", "initial buffer fill time in periods of 27Mhz clock", 0, FF_OPT_TYPE_INT64, {.dbl = 0 }, 0, INT64_MAX},
+{"audio_service_type", "audio service type", OFFSET(audio_service_type), FF_OPT_TYPE_INT, {.dbl = AV_AUDIO_SERVICE_TYPE_MAIN }, 0, AV_AUDIO_SERVICE_TYPE_NB-1, A|E, "audio_service_type"},
+{"ma", "Main Audio Service", 0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_MAIN },              INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"ef", "Effects",            0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_EFFECTS },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"vi", "Visually Impaired",  0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED }, INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"hi", "Hearing Impaired",   0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED },  INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"di", "Dialogue",           0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_DIALOGUE },          INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"co", "Commentary",         0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_COMMENTARY },        INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"em", "Emergency",          0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_EMERGENCY },         INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"vo", "Voice Over",         0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_VOICE_OVER },        INT_MIN, INT_MAX, A|E, "audio_service_type"},
+{"ka", "Karaoke",            0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_KARAOKE },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {NULL},
 };
 
diff --git a/libavfilter/vsrc_movie.c b/libavfilter/vsrc_movie.c
index 88220bc4fd..5e15524e61 100644
--- a/libavfilter/vsrc_movie.c
+++ b/libavfilter/vsrc_movie.c
@@ -57,12 +57,12 @@ typedef struct {
 #define OFFSET(x) offsetof(MovieContext, x)
 
 static const AVOption movie_options[]= {
-{"format_name",  "set format name",         OFFSET(format_name),  FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"f",            "set format name",         OFFSET(format_name),  FF_OPT_TYPE_STRING, 0,  CHAR_MIN, CHAR_MAX },
-{"stream_index", "set stream index",        OFFSET(stream_index), FF_OPT_TYPE_INT,   -1,  -1,       INT_MAX  },
-{"si",           "set stream index",        OFFSET(stream_index), FF_OPT_TYPE_INT,   -1,  -1,       INT_MAX  },
-{"seek_point",   "set seekpoint (seconds)", OFFSET(seek_point_d), FF_OPT_TYPE_DOUBLE, 0,  0,        (INT64_MAX-1) / 1000000 },
-{"sp",           "set seekpoint (seconds)", OFFSET(seek_point_d), FF_OPT_TYPE_DOUBLE, 0,  0,        (INT64_MAX-1) / 1000000 },
+{"format_name",  "set format name",         OFFSET(format_name),  FF_OPT_TYPE_STRING, {.str =  0},  CHAR_MIN, CHAR_MAX },
+{"f",            "set format name",         OFFSET(format_name),  FF_OPT_TYPE_STRING, {.str =  0},  CHAR_MIN, CHAR_MAX },
+{"stream_index", "set stream index",        OFFSET(stream_index), FF_OPT_TYPE_INT,    {.dbl = -1},  -1,       INT_MAX  },
+{"si",           "set stream index",        OFFSET(stream_index), FF_OPT_TYPE_INT,    {.dbl = -1},  -1,       INT_MAX  },
+{"seek_point",   "set seekpoint (seconds)", OFFSET(seek_point_d), FF_OPT_TYPE_DOUBLE, {.dbl =  0},  0,        (INT64_MAX-1) / 1000000 },
+{"sp",           "set seekpoint (seconds)", OFFSET(seek_point_d), FF_OPT_TYPE_DOUBLE, {.dbl =  0},  0,        (INT64_MAX-1) / 1000000 },
 {NULL},
 };
 
diff --git a/libavformat/http.c b/libavformat/http.c
index 7d59152409..ff8f2405eb 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -54,7 +54,7 @@ typedef struct {
 
 #define OFFSET(x) offsetof(HTTPContext, x)
 static const AVOption options[] = {
-{"chunksize", "use chunked transfer-encoding for posts, -1 disables it, 0 enables it", OFFSET(chunksize), FF_OPT_TYPE_INT64, 0, -1, 0 }, /* Default to 0, for chunked POSTs */
+{"chunksize", "use chunked transfer-encoding for posts, -1 disables it, 0 enables it", OFFSET(chunksize), FF_OPT_TYPE_INT64, {.dbl = 0}, -1, 0 }, /* Default to 0, for chunked POSTs */
 {NULL}
 };
 static const AVClass httpcontext_class = {
diff --git a/libavformat/mp3enc.c b/libavformat/mp3enc.c
index 10abe0994c..2337837ef1 100644
--- a/libavformat/mp3enc.c
+++ b/libavformat/mp3enc.c
@@ -162,7 +162,7 @@ typedef struct MP3Context {
 
 static const AVOption options[] = {
     { "id3v2_version", "Select ID3v2 version to write. Currently 3 and 4 are supported.",
-      offsetof(MP3Context, id3v2_version), FF_OPT_TYPE_INT, 4, 3, 4, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MP3Context, id3v2_version), FF_OPT_TYPE_INT, {.dbl = 4}, 3, 4, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 55a16f7f0e..8fa8a56c90 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -76,15 +76,15 @@ typedef struct MpegTSWrite {
 
 static const AVOption options[] = {
     { "mpegts_transport_stream_id", "Set transport_stream_id field.",
-      offsetof(MpegTSWrite, transport_stream_id), FF_OPT_TYPE_INT, 0x0001, 0x0001, 0xffff, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MpegTSWrite, transport_stream_id), FF_OPT_TYPE_INT, {.dbl = 0x0001 }, 0x0001, 0xffff, AV_OPT_FLAG_ENCODING_PARAM},
     { "mpegts_original_network_id", "Set original_network_id field.",
-      offsetof(MpegTSWrite, original_network_id), FF_OPT_TYPE_INT, 0x0001, 0x0001, 0xffff, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MpegTSWrite, original_network_id), FF_OPT_TYPE_INT, {.dbl = 0x0001 }, 0x0001, 0xffff, AV_OPT_FLAG_ENCODING_PARAM},
     { "mpegts_service_id", "Set service_id field.",
-      offsetof(MpegTSWrite, service_id), FF_OPT_TYPE_INT, 0x0001, 0x0001, 0xffff, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MpegTSWrite, service_id), FF_OPT_TYPE_INT, {.dbl = 0x0001 }, 0x0001, 0xffff, AV_OPT_FLAG_ENCODING_PARAM},
     { "mpegts_pmt_start_pid", "Set the first pid of the PMT.",
-      offsetof(MpegTSWrite, pmt_start_pid), FF_OPT_TYPE_INT, 0x1000, 0x1000, 0x1f00, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MpegTSWrite, pmt_start_pid), FF_OPT_TYPE_INT, {.dbl = 0x1000 }, 0x1000, 0x1f00, AV_OPT_FLAG_ENCODING_PARAM},
     { "mpegts_start_pid", "Set the first pid.",
-      offsetof(MpegTSWrite, start_pid), FF_OPT_TYPE_INT, 0x0100, 0x0100, 0x0f00, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MpegTSWrite, start_pid), FF_OPT_TYPE_INT, {.dbl = 0x0100 }, 0x0100, 0x0f00, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
diff --git a/libavformat/options.c b/libavformat/options.c
index 2192ff6575..bdf4796e49 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -40,23 +40,23 @@ static const char* format_to_name(void* ptr)
 #define D AV_OPT_FLAG_DECODING_PARAM
 
 static const AVOption options[]={
-{"probesize", "set probing size", OFFSET(probesize), FF_OPT_TYPE_INT, 5000000, 32, INT_MAX, D},
-{"muxrate", "set mux rate", OFFSET(mux_rate), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, E},
-{"packetsize", "set packet size", OFFSET(packet_size), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, E},
-{"fflags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, D|E, "fflags"},
-{"ignidx", "ignore index", 0, FF_OPT_TYPE_CONST, AVFMT_FLAG_IGNIDX, INT_MIN, INT_MAX, D, "fflags"},
-{"genpts", "generate pts", 0, FF_OPT_TYPE_CONST, AVFMT_FLAG_GENPTS, INT_MIN, INT_MAX, D, "fflags"},
-{"nofillin", "do not fill in missing values that can be exactly calculated", 0, FF_OPT_TYPE_CONST, AVFMT_FLAG_NOFILLIN, INT_MIN, INT_MAX, D, "fflags"},
-{"noparse", "disable AVParsers, this needs nofillin too", 0, FF_OPT_TYPE_CONST, AVFMT_FLAG_NOPARSE, INT_MIN, INT_MAX, D, "fflags"},
-{"igndts", "ignore dts", 0, FF_OPT_TYPE_CONST, AVFMT_FLAG_IGNDTS, INT_MIN, INT_MAX, D, "fflags"},
-{"rtphint", "add rtp hinting", 0, FF_OPT_TYPE_CONST, AVFMT_FLAG_RTP_HINT, INT_MIN, INT_MAX, E, "fflags"},
-{"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, 5*AV_TIME_BASE, 0, INT_MAX, D},
-{"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, 0, 0, 0, D},
-{"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), FF_OPT_TYPE_INT, 1<<20, 0, INT_MAX, D},
-{"rtbufsize", "max memory used for buffering real-time frames", OFFSET(max_picture_buffer), FF_OPT_TYPE_INT, 3041280, 0, INT_MAX, D}, /* defaults to 1s of 15fps 352x288 YUYV422 video */
-{"fdebug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, DEFAULT, 0, INT_MAX, E|D, "fdebug"},
-{"ts", NULL, 0, FF_OPT_TYPE_CONST, FF_FDEBUG_TS, INT_MIN, INT_MAX, E|D, "fdebug"},
-{"max_delay", "maximum muxing or demuxing delay in microseconds", OFFSET(max_delay), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, E|D},
+{"probesize", "set probing size", OFFSET(probesize), FF_OPT_TYPE_INT, {.dbl = 5000000 }, 32, INT_MAX, D},
+{"muxrate", "set mux rate", OFFSET(mux_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, E},
+{"packetsize", "set packet size", OFFSET(packet_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, E},
+{"fflags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, INT_MIN, INT_MAX, D|E, "fflags"},
+{"ignidx", "ignore index", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_IGNIDX }, INT_MIN, INT_MAX, D, "fflags"},
+{"genpts", "generate pts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_GENPTS }, INT_MIN, INT_MAX, D, "fflags"},
+{"nofillin", "do not fill in missing values that can be exactly calculated", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_NOFILLIN }, INT_MIN, INT_MAX, D, "fflags"},
+{"noparse", "disable AVParsers, this needs nofillin too", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_NOPARSE }, INT_MIN, INT_MAX, D, "fflags"},
+{"igndts", "ignore dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_IGNDTS }, INT_MIN, INT_MAX, D, "fflags"},
+{"rtphint", "add rtp hinting", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"},
+{"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, {.dbl = 5*AV_TIME_BASE }, 0, INT_MAX, D},
+{"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
+{"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), FF_OPT_TYPE_INT, {.dbl = 1<<20 }, 0, INT_MAX, D},
+{"rtbufsize", "max memory used for buffering real-time frames", OFFSET(max_picture_buffer), FF_OPT_TYPE_INT, {.dbl = 3041280 }, 0, INT_MAX, D}, /* defaults to 1s of 15fps 352x288 YUYV422 video */
+{"fdebug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, INT_MAX, E|D, "fdebug"},
+{"ts", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_FDEBUG_TS }, INT_MIN, INT_MAX, E|D, "fdebug"},
+{"max_delay", "maximum muxing or demuxing delay in microseconds", OFFSET(max_delay), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, E|D},
 {NULL},
 };
 
diff --git a/libavformat/spdifenc.c b/libavformat/spdifenc.c
index 35c7b162fb..24c2f15754 100644
--- a/libavformat/spdifenc.c
+++ b/libavformat/spdifenc.c
@@ -86,10 +86,10 @@ typedef struct IEC61937Context {
 } IEC61937Context;
 
 static const AVOption options[] = {
-{ "spdif_flags", "IEC 61937 encapsulation flags", offsetof(IEC61937Context, spdif_flags), FF_OPT_TYPE_FLAGS, 0, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "spdif_flags" },
-{ "be", "output in big-endian format (for use as s16be)", 0, FF_OPT_TYPE_CONST, SPDIF_FLAG_BIGENDIAN, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "spdif_flags" },
-{ "dtshd_rate", "mux complete DTS frames in HD mode at the specified IEC958 rate (in Hz, default 0=disabled)", offsetof(IEC61937Context, dtshd_rate), FF_OPT_TYPE_INT, 0, 0, 768000, AV_OPT_FLAG_ENCODING_PARAM },
-{ "dtshd_fallback_time", "min secs to strip HD for after an overflow (-1: till the end, default 60)", offsetof(IEC61937Context, dtshd_fallback), FF_OPT_TYPE_INT, 60, -1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+{ "spdif_flags", "IEC 61937 encapsulation flags", offsetof(IEC61937Context, spdif_flags), FF_OPT_TYPE_FLAGS, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "spdif_flags" },
+{ "be", "output in big-endian format (for use as s16be)", 0, FF_OPT_TYPE_CONST, {.dbl = SPDIF_FLAG_BIGENDIAN},  0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "spdif_flags" },
+{ "dtshd_rate", "mux complete DTS frames in HD mode at the specified IEC958 rate (in Hz, default 0=disabled)", offsetof(IEC61937Context, dtshd_rate), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 768000, AV_OPT_FLAG_ENCODING_PARAM },
+{ "dtshd_fallback_time", "min secs to strip HD for after an overflow (-1: till the end, default 60)", offsetof(IEC61937Context, dtshd_fallback), FF_OPT_TYPE_INT, {.dbl = 60}, -1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
 { NULL },
 };
 
diff --git a/libavutil/opt.c b/libavutil/opt.c
index f08ed8f068..57e3248a74 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -164,8 +164,8 @@ int av_set_string3(void *obj, const char *name, const char *val, int alloc, cons
             {
                 const AVOption *o_named= av_find_opt(obj, buf, o->unit, 0, 0);
                 if (o_named && o_named->type == FF_OPT_TYPE_CONST)
-                    d= o_named->default_val;
-                else if (!strcmp(buf, "default")) d= o->default_val;
+                    d= o_named->default_val.dbl;
+                else if (!strcmp(buf, "default")) d= o->default_val.dbl;
                 else if (!strcmp(buf, "max"    )) d= o->max;
                 else if (!strcmp(buf, "min"    )) d= o->min;
                 else if (!strcmp(buf, "none"   )) d= 0;
@@ -417,25 +417,25 @@ void av_opt_set_defaults2(void *s, int mask, int flags)
             case FF_OPT_TYPE_FLAGS:
             case FF_OPT_TYPE_INT: {
                 int val;
-                val = opt->default_val;
+                val = opt->default_val.dbl;
                 av_set_int(s, opt->name, val);
             }
             break;
             case FF_OPT_TYPE_INT64:
-                if ((double)(opt->default_val+0.6) == opt->default_val)
+                if ((double)(opt->default_val.dbl+0.6) == opt->default_val.dbl)
                     av_log(s, AV_LOG_DEBUG, "loss of precision in default of %s\n", opt->name);
-                av_set_int(s, opt->name, opt->default_val);
+                av_set_int(s, opt->name, opt->default_val.dbl);
             break;
             case FF_OPT_TYPE_DOUBLE:
             case FF_OPT_TYPE_FLOAT: {
                 double val;
-                val = opt->default_val;
+                val = opt->default_val.dbl;
                 av_set_double(s, opt->name, val);
             }
             break;
             case FF_OPT_TYPE_RATIONAL: {
                 AVRational val;
-                val = av_d2q(opt->default_val, INT_MAX);
+                val = av_d2q(opt->default_val.dbl, INT_MAX);
                 av_set_q(s, opt->name, val);
             }
             break;
diff --git a/libavutil/opt.h b/libavutil/opt.h
index 3e61737dd4..6668139fec 100644
--- a/libavutil/opt.h
+++ b/libavutil/opt.h
@@ -61,72 +61,26 @@ typedef struct AVOption {
     int offset;
     enum AVOptionType type;
 
-    /**
-     * the default value for scalar options
-     */
-    double default_val;
-    double min;                 ///< minimum valid value for the option
-    double max;                 ///< maximum valid value for the option
-
-    int flags;
-#define AV_OPT_FLAG_ENCODING_PARAM  1   ///< a generic parameter which can be set by the user for muxing or encoding
-#define AV_OPT_FLAG_DECODING_PARAM  2   ///< a generic parameter which can be set by the user for demuxing or decoding
-#define AV_OPT_FLAG_METADATA        4   ///< some data extracted or inserted into the file like title, comment, ...
-#define AV_OPT_FLAG_AUDIO_PARAM     8
-#define AV_OPT_FLAG_VIDEO_PARAM     16
-#define AV_OPT_FLAG_SUBTITLE_PARAM  32
-//FIXME think about enc-audio, ... style flags
-
-    /**
-     * The logical unit to which the option belongs. Non-constant
-     * options and corresponding named constants share the same
-     * unit. May be NULL.
-     */
-    const char *unit;
-} AVOption;
-
-/**
- * AVOption2.
- * THIS IS NOT PART OF THE API/ABI YET!
- * This is identical to AVOption except that default_val was replaced by
- * an union, it should be compatible with AVOption on normal platforms.
- */
-typedef struct AVOption2 {
-    const char *name;
-
-    /**
-     * short English help text
-     * @todo What about other languages?
-     */
-    const char *help;
-
-    /**
-     * The offset relative to the context structure where the option
-     * value is stored. It should be 0 for named constants.
-     */
-    int offset;
-    enum AVOptionType type;
-
     /**
      * the default value for scalar options
      */
     union {
         double dbl;
         const char *str;
+        /* TODO those are unused now */
+        int64_t i64;
+        AVRational q;
     } default_val;
-
     double min;                 ///< minimum valid value for the option
     double max;                 ///< maximum valid value for the option
 
     int flags;
-/*
 #define AV_OPT_FLAG_ENCODING_PARAM  1   ///< a generic parameter which can be set by the user for muxing or encoding
 #define AV_OPT_FLAG_DECODING_PARAM  2   ///< a generic parameter which can be set by the user for demuxing or decoding
 #define AV_OPT_FLAG_METADATA        4   ///< some data extracted or inserted into the file like title, comment, ...
 #define AV_OPT_FLAG_AUDIO_PARAM     8
 #define AV_OPT_FLAG_VIDEO_PARAM     16
 #define AV_OPT_FLAG_SUBTITLE_PARAM  32
-*/
 //FIXME think about enc-audio, ... style flags
 
     /**
@@ -135,8 +89,7 @@ typedef struct AVOption2 {
      * unit. May be NULL.
      */
     const char *unit;
-} AVOption2;
-
+} AVOption;
 
 /**
  * Look for an option in obj. Look only for the options which
diff --git a/libswscale/options.c b/libswscale/options.c
index 7cee46076f..f80735b261 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -34,40 +34,40 @@ static const char * sws_context_to_name(void * ptr)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 static const AVOption options[] = {
-    { "sws_flags", "scaler/cpu flags", OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, 0, UINT_MAX, VE, "sws_flags" },
-    { "fast_bilinear", "fast bilinear", 0, FF_OPT_TYPE_CONST, SWS_FAST_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bilinear", "bilinear", 0, FF_OPT_TYPE_CONST, SWS_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bicubic", "bicubic", 0, FF_OPT_TYPE_CONST, SWS_BICUBIC, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "experimental", "experimental", 0, FF_OPT_TYPE_CONST, SWS_X, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "neighbor", "nearest neighbor", 0, FF_OPT_TYPE_CONST, SWS_POINT, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "area", "averaging area", 0, FF_OPT_TYPE_CONST, SWS_AREA, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bicublin", "luma bicubic, chroma bilinear", 0, FF_OPT_TYPE_CONST, SWS_BICUBLIN, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "gauss", "gaussian", 0, FF_OPT_TYPE_CONST, SWS_GAUSS, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "sinc", "sinc", 0, FF_OPT_TYPE_CONST, SWS_SINC, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "lanczos", "lanczos", 0, FF_OPT_TYPE_CONST, SWS_LANCZOS, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_SSE2, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "sws_flags", "scaler/cpu flags", OFFSET(flags), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, UINT_MAX, VE, "sws_flags" },
+    { "fast_bilinear", "fast bilinear", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_FAST_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bilinear", "bilinear", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bicubic", "bicubic", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_BICUBIC }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "experimental", "experimental", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_X }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "neighbor", "nearest neighbor", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_POINT }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "area", "averaging area", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_AREA }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bicublin", "luma bicubic, chroma bilinear", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_BICUBLIN }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "gauss", "gaussian", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_GAUSS }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "sinc", "sinc", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_SINC }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "lanczos", "lanczos", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_LANCZOS }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "print_info", "print info", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_SSE2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_3DNOW }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_ALTIVEC }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_BFIN }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "bitexact", "", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" },
 
-    { "srcw", "source width"      , OFFSET(srcW), FF_OPT_TYPE_INT, 16, 1, INT_MAX, VE },
-    { "srch", "source height"     , OFFSET(srcH), FF_OPT_TYPE_INT, 16, 1, INT_MAX, VE },
-    { "dstw", "destination width" , OFFSET(dstW), FF_OPT_TYPE_INT, 16, 1, INT_MAX, VE },
-    { "dsth", "destination height", OFFSET(dstH), FF_OPT_TYPE_INT, 16, 1, INT_MAX, VE },
-    { "src_format", "source format"     , OFFSET(srcFormat), FF_OPT_TYPE_INT, DEFAULT, 0, PIX_FMT_NB-1, VE },
-    { "dst_format", "destination format", OFFSET(dstFormat), FF_OPT_TYPE_INT, DEFAULT, 0, PIX_FMT_NB-1, VE },
-    { "src_range" , "source range"      , OFFSET(srcRange) , FF_OPT_TYPE_INT, DEFAULT, 0, 1, VE },
-    { "dst_range" , "destination range" , OFFSET(dstRange) , FF_OPT_TYPE_INT, DEFAULT, 0, 1, VE },
-    { "param0" , "scaler param 0" , OFFSET(param[0]) , FF_OPT_TYPE_DOUBLE, SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
-    { "param1" , "scaler param 1" , OFFSET(param[1]) , FF_OPT_TYPE_DOUBLE, SWS_PARAM_DEFAULT, INT_MIN, INT_MAX, VE },
+    { "srcw", "source width"      , OFFSET(srcW), FF_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
+    { "srch", "source height"     , OFFSET(srcH), FF_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
+    { "dstw", "destination width" , OFFSET(dstW), FF_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
+    { "dsth", "destination height", OFFSET(dstH), FF_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
+    { "src_format", "source format"     , OFFSET(srcFormat), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, PIX_FMT_NB-1, VE },
+    { "dst_format", "destination format", OFFSET(dstFormat), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, PIX_FMT_NB-1, VE },
+    { "src_range" , "source range"      , OFFSET(srcRange) , FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, 1, VE },
+    { "dst_range" , "destination range" , OFFSET(dstRange) , FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, 1, VE },
+    { "param0" , "scaler param 0" , OFFSET(param[0]) , FF_OPT_TYPE_DOUBLE, {.dbl = SWS_PARAM_DEFAULT}, INT_MIN, INT_MAX, VE },
+    { "param1" , "scaler param 1" , OFFSET(param[1]) , FF_OPT_TYPE_DOUBLE, {.dbl = SWS_PARAM_DEFAULT}, INT_MIN, INT_MAX, VE },
 
     { NULL }
 };
-- 
cgit v1.2.3