summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/APIchanges6
-rw-r--r--ffmpeg.c16
-rw-r--r--libavcodec/alacenc.c4
-rw-r--r--libavcodec/alpha/dsputil_alpha.c4
-rw-r--r--libavcodec/arm/dsputil_init_arm.c4
-rw-r--r--libavcodec/arm/dsputil_init_armv6.c4
-rw-r--r--libavcodec/arm/dsputil_init_neon.c6
-rw-r--r--libavcodec/arm/dsputil_iwmmxt.c4
-rw-r--r--libavcodec/arm/h264pred_init_arm.c2
-rw-r--r--libavcodec/avcodec.h35
-rw-r--r--libavcodec/bfin/dsputil_bfin.c6
-rw-r--r--libavcodec/dsputil.c6
-rw-r--r--libavcodec/dsputil_template.c1391
-rw-r--r--libavcodec/flacenc.c102
-rw-r--r--libavcodec/h264.c135
-rw-r--r--libavcodec/h264.h10
-rw-r--r--libavcodec/h264_cabac.c26
-rw-r--r--libavcodec/h264_cavlc.c11
-rw-r--r--libavcodec/h264_loopfilter.c224
-rw-r--r--libavcodec/h264_ps.c2
-rw-r--r--libavcodec/h264_refs.c5
-rw-r--r--libavcodec/h264dsp.c104
-rw-r--r--libavcodec/h264dsp.h4
-rw-r--r--libavcodec/h264dsp_template.c313
-rw-r--r--libavcodec/h264idct.c6
-rw-r--r--libavcodec/h264idct_template.c291
-rw-r--r--libavcodec/h264pred.c364
-rw-r--r--libavcodec/h264pred_template.c975
-rw-r--r--libavcodec/high_bit_depth.h85
-rw-r--r--libavcodec/lpc.c12
-rw-r--r--libavcodec/lpc.h17
-rw-r--r--libavcodec/mlib/dsputil_mlib.c4
-rw-r--r--libavcodec/options.c20
-rw-r--r--libavcodec/ppc/dsputil_altivec.c6
-rw-r--r--libavcodec/ppc/dsputil_ppc.c4
-rw-r--r--libavcodec/ppc/h264_altivec.c4
-rw-r--r--libavcodec/ps2/dsputil_mmi.c4
-rw-r--r--libavcodec/ra144enc.c4
-rw-r--r--libavcodec/sh4/dsputil_align.c8
-rw-r--r--libavcodec/sh4/dsputil_sh4.c4
-rw-r--r--libavcodec/sparc/dsputil_vis.c4
-rw-r--r--libavcodec/utils.c6
-rw-r--r--libavcodec/version.h3
-rw-r--r--libavcodec/x86/dsputil_mmx.c36
-rw-r--r--libavformat/utils.c10
-rw-r--r--libavutil/opt.h3
-rw-r--r--libavutil/pixfmt.h2
-rw-r--r--libswscale/utils.c4
-rw-r--r--tests/fate/h264.mak14
-rw-r--r--tests/ref/fate/h264-conformance-frext-pph10i1_panasonic_a10
-rw-r--r--tests/ref/fate/h264-conformance-frext-pph10i2_panasonic_a10
-rw-r--r--tests/ref/fate/h264-conformance-frext-pph10i3_panasonic_a10
-rw-r--r--tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a19
-rw-r--r--tests/ref/fate/h264-conformance-frext-pph10i5_panasonic_a10
-rw-r--r--tests/ref/fate/h264-conformance-frext-pph10i6_panasonic_a10
-rw-r--r--tests/ref/fate/h264-conformance-frext-pph10i7_panasonic_a10
56 files changed, 3934 insertions, 459 deletions
diff --git a/doc/APIchanges b/doc/APIchanges
index 1f5e8aad39..aa2827af08 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,12 @@ libavutil: 2011-04-18
API changes, most recent first:
+2011-05-10 - xxxxxxx - lavc 53.3.0 - avcodec.h
+ Deprecate AVLPCType and the following fields in
+ AVCodecContext: lpc_coeff_precision, prediction_order_method,
+ min_partition_order, max_partition_order, lpc_type, lpc_passes.
+ Corresponding FLAC encoder options should be used instead.
+
2011-05-07 - xxxxxxx - lavfi 2.5.0 - avcodec.h
Add libavfilter/avcodec.h header and avfilter_copy_frame_props()
function.
diff --git a/ffmpeg.c b/ffmpeg.c
index 74461d368f..2875d8aec8 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -292,7 +292,6 @@ typedef struct AVOutputStream {
int resample_pix_fmt;
float frame_aspect_ratio;
-
/* forced key frames */
int64_t *forced_kf_pts;
int forced_kf_count;
@@ -1505,7 +1504,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
AVFormatContext *os;
AVOutputStream *ost;
int ret, i;
- int got_picture;
+ int got_output;
AVFrame picture;
void *buffer_to_free = NULL;
static unsigned int samples_size= 0;
@@ -1537,7 +1536,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
pkt_pts = av_rescale_q(pkt->pts, ist->st->time_base, AV_TIME_BASE_Q);
//while we have more to decode or while the decoder did output something on EOF
- while (avpkt.size > 0 || (!pkt && ist->next_pts != ist->pts)) {
+ while (avpkt.size > 0 || (!pkt && got_output)) {
uint8_t *data_buf, *decoded_data_buf;
int data_size, decoded_data_size;
handle_eof:
@@ -1573,9 +1572,10 @@ static int output_packet(AVInputStream *ist, int ist_index,
avpkt.data += ret;
avpkt.size -= ret;
data_size = ret;
+ got_output = decoded_data_size > 0;
/* Some bug in mpeg audio decoder gives */
/* decoded_data_size < 0, it seems they are overflows */
- if (decoded_data_size <= 0) {
+ if (!got_output) {
/* no audio frame */
continue;
}
@@ -1592,11 +1592,11 @@ static int output_packet(AVInputStream *ist, int ist_index,
pkt_pts = AV_NOPTS_VALUE;
ret = avcodec_decode_video2(ist->st->codec,
- &picture, &got_picture, &avpkt);
+ &picture, &got_output, &avpkt);
ist->st->quality= picture.quality;
if (ret < 0)
goto fail_decode;
- if (!got_picture) {
+ if (!got_output) {
/* no picture yet */
goto discard_packet;
}
@@ -1613,10 +1613,10 @@ static int output_packet(AVInputStream *ist, int ist_index,
break;
case AVMEDIA_TYPE_SUBTITLE:
ret = avcodec_decode_subtitle2(ist->st->codec,
- &subtitle, &got_picture, &avpkt);
+ &subtitle, &got_output, &avpkt);
if (ret < 0)
goto fail_decode;
- if (!got_picture) {
+ if (!got_output) {
goto discard_packet;
}
subtitle_to_free = &subtitle;
diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c
index 9d2865d51e..c3a1fdfa03 100644
--- a/libavcodec/alacenc.c
+++ b/libavcodec/alacenc.c
@@ -146,7 +146,7 @@ static void calc_predictor_params(AlacEncodeContext *s, int ch)
s->min_prediction_order,
s->max_prediction_order,
ALAC_MAX_LPC_PRECISION, coefs, shift,
- AV_LPC_TYPE_LEVINSON, 0,
+ FF_LPC_TYPE_LEVINSON, 0,
ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1);
s->lpc[ch].lpc_order = opt_order;
@@ -457,7 +457,7 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
s->avctx = avctx;
ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, s->max_prediction_order,
- AV_LPC_TYPE_LEVINSON);
+ FF_LPC_TYPE_LEVINSON);
return ret;
}
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 96e7030e9d..6ce3f4bf15 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -270,9 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 218d162687..0351412761 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -75,7 +75,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
ff_put_pixels_clamped = c->put_pixels_clamped;
ff_add_pixels_clamped = c->add_pixels_clamped;
@@ -97,7 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = ff_add_pixels_clamped_arm;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index fc0f7865f0..9acea4a1d6 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -72,7 +72,7 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) {
@@ -82,7 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
}
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 9e456f32df..6faf3dc8d0 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -173,7 +173,7 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (!avctx->lowres) {
if (avctx->idct_algo == FF_IDCT_AUTO ||
@@ -192,7 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
}
}
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->clear_block = ff_clear_block_neon;
c->clear_blocks = ff_clear_blocks_neon;
@@ -223,7 +223,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
if (CONFIG_H264_DECODER) {
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
index 6db1837ba0..85be83148a 100644
--- a/libavcodec/arm/dsputil_iwmmxt.c
+++ b/libavcodec/arm/dsputil_iwmmxt.c
@@ -155,7 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -168,7 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->clear_blocks = clear_blocks_iwmmxt;
c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index 5b11b7da88..b1d4f005e8 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon;
}
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth)
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth)
{
if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth);
}
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index acdb183f90..00e9dd5e2f 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -516,10 +516,11 @@ enum AVChromaLocation{
AVCHROMA_LOC_NB , ///< Not part of ABI
};
+#if FF_API_FLAC_GLOBAL_OPTS
/**
* LPC analysis type
*/
-enum AVLPCType {
+attribute_deprecated enum AVLPCType {
AV_LPC_TYPE_DEFAULT = -1, ///< use the codec default LPC type
AV_LPC_TYPE_NONE = 0, ///< do not use LPC prediction or use all zero coefficients
AV_LPC_TYPE_FIXED = 1, ///< fixed LPC coefficients
@@ -527,6 +528,7 @@ enum AVLPCType {
AV_LPC_TYPE_CHOLESKY = 3, ///< Cholesky factorization
AV_LPC_TYPE_NB , ///< Not part of ABI
};
+#endif
enum AVAudioServiceType {
AV_AUDIO_SERVICE_TYPE_MAIN = 0,
@@ -2514,42 +2516,53 @@ typedef struct AVCodecContext {
#define FF_COMPRESSION_DEFAULT -1
/**
- * LPC coefficient precision - used by FLAC encoder
* - encoding: Set by user.
* - decoding: unused
*/
- int lpc_coeff_precision;
+ int min_prediction_order;
/**
* - encoding: Set by user.
* - decoding: unused
*/
- int min_prediction_order;
+ int max_prediction_order;
+#if FF_API_FLAC_GLOBAL_OPTS
/**
+ * @defgroup flac_opts FLAC options
+ * @deprecated Use FLAC encoder private options instead.
+ * @{
+ */
+
+ /**
+ * LPC coefficient precision - used by FLAC encoder
* - encoding: Set by user.
* - decoding: unused
*/
- int max_prediction_order;
+ attribute_deprecated int lpc_coeff_precision;
/**
* search method for selecting prediction order
* - encoding: Set by user.
* - decoding: unused
*/
- int prediction_order_method;
+ attribute_deprecated int prediction_order_method;
/**
* - encoding: Set by user.
* - decoding: unused
*/
- int min_partition_order;
+ attribute_deprecated int min_partition_order;
/**
* - encoding: Set by user.
* - decoding: unused
*/
- int max_partition_order;
+ attribute_deprecated int max_partition_order;
+ /**
+ * @}
+ */
+#endif
/**
* GOP timecode frame start number, in non drop frame format
@@ -2767,19 +2780,21 @@ typedef struct AVCodecContext {
int log_level_offset;
+#if FF_API_FLAC_GLOBAL_OPTS
/**
* Determines which LPC analysis algorithm to use.
* - encoding: Set by user
* - decoding: unused
*/
- enum AVLPCType lpc_type;
+ attribute_deprecated enum AVLPCType lpc_type;
/**
* Number of passes to use for Cholesky factorization during LPC analysis
* - encoding: Set by user
* - decoding: unused
*/
- int lpc_passes;
+ attribute_deprecated int lpc_passes;
+#endif
/**
* Number of slices.
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index 01d7ec6a44..5b94472326 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -197,14 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
c->get_pixels = ff_bfin_get_pixels;
c->diff_pixels = ff_bfin_diff_pixels;
c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
- if (!h264_high_depth)
+ if (!high_bit_depth)
c->clear_blocks = bfin_clear_blocks;
c->pix_sum = ff_bfin_pix_sum;
c->pix_norm1 = ff_bfin_pix_norm1;
@@ -231,7 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
c->sse[1] = ff_bfin_sse8;
c->sse[2] = ff_bfin_sse4;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = bfin_put_pixels16;
c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 162458a7b5..0e596b1b01 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -43,15 +43,15 @@ uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
uint32_t ff_squareTbl[512] = {0, };
#define BIT_DEPTH 9
-#include "dsputil_internal.h"
+#include "dsputil_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
-#include "dsputil_internal.h"
+#include "dsputil_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 8
-#include "dsputil_internal.h"
+#include "dsputil_template.c"
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
#define pb_7f (~0UL/255 * 0x7f)
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
new file mode 100644
index 0000000000..8ca6d3e414
--- /dev/null
+++ b/libavcodec/dsputil_template.c
@@ -0,0 +1,1391 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DSP utils
+ */
+
+#include "high_bit_depth.h"
+
+static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN2P(dst , AV_RN2P(src ));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
+
+static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN4P(dst , AV_RN4P(src ));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
+
+static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN4P(dst , AV_RN4P(src ));
+ AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
+
+static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN4P(dst , AV_RN4P(src ));
+ AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
+ AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
+ AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
+
+/* draw the edges of width 'w' of an image of size width, height */
+//FIXME check that this is ok for mpeg4 interlaced
+static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int sides)
+{
+ pixel *buf = (pixel*)_buf;
+ int wrap = _wrap / sizeof(pixel);
+ pixel *ptr, *last_line;
+ int i;
+
+ /* left and right */
+ ptr = buf;
+ for(i=0;i<height;i++) {
+#if BIT_DEPTH > 8
+ int j;
+ for (j = 0; j < w; j++) {
+ ptr[j-w] = ptr[0];
+ ptr[j+width] = ptr[width-1];
+ }
+#else
+ memset(ptr - w, ptr[0], w);
+ memset(ptr + width, ptr[width-1], w);
+#endif
+ ptr += wrap;
+ }
+
+ /* top and bottom + corners */
+ buf -= w;
+ last_line = buf + (height - 1) * wrap;
+ if (sides & EDGE_TOP)
+ for(i = 0; i < w; i++)
+ memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
+ if (sides & EDGE_BOTTOM)
+ for (i = 0; i < w; i++)
+ memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
+}
+
+/**
+ * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
+ int src_x, int src_y, int w, int h){
+ int x, y;
+ int start_y, start_x, end_y, end_x;
+
+ if(src_y>= h){
+ src+= (h-1-src_y)*linesize;
+ src_y=h-1;
+ }else if(src_y<=-block_h){
+ src+= (1-block_h-src_y)*linesize;
+ src_y=1-block_h;
+ }
+ if(src_x>= w){
+ src+= (w-1-src_x)*sizeof(pixel);
+ src_x=w-1;
+ }else if(src_x<=-block_w){
+ src+= (1-block_w-src_x)*sizeof(pixel);
+ src_x=1-block_w;
+ }
+
+ start_y= FFMAX(0, -src_y);
+ start_x= FFMAX(0, -src_x);
+ end_y= FFMIN(block_h, h-src_y);
+ end_x= FFMIN(block_w, w-src_x);
+ assert(start_y < end_y && block_h);
+ assert(start_x < end_x && block_w);
+
+ w = end_x - start_x;
+ src += start_y*linesize + start_x*sizeof(pixel);
+ buf += start_x*sizeof(pixel);
+
+ //top
+ for(y=0; y<start_y; y++){
+ memcpy(buf, src, w*sizeof(pixel));
+ buf += linesize;
+ }
+
+ // copy existing part
+ for(; y<end_y; y++){
+ memcpy(buf, src, w*sizeof(pixel));
+ src += linesize;
+ buf += linesize;
+ }
+
+ //bottom
+ src -= linesize;
+ for(; y<block_h; y++){
+ memcpy(buf, src, w*sizeof(pixel));
+ buf += linesize;
+ }
+
+ buf -= block_h * linesize + start_x*sizeof(pixel);
+ while (block_h--){
+ pixel *bufp = (pixel*)buf;
+ //left
+ for(x=0; x<start_x; x++){
+ bufp[x] = bufp[start_x];
+ }
+
+ //right
+ for(x=end_x; x<block_w; x++){
+ bufp[x] = bufp[end_x - 1];
+ }
+ buf += linesize;
+ }
+}
+
+static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
+{
+ int i;
+ pixel *restrict pixels = (pixel *restrict)_pixels;
+ dctcoef *block = (dctcoef*)_block;
+ line_size /= sizeof(pixel);
+
+ for(i=0;i<8;i++) {
+ pixels[0] += block[0];
+ pixels[1] += block[1];
+ pixels[2] += block[2];
+ pixels[3] += block[3];
+ pixels[4] += block[4];
+ pixels[5] += block[5];
+ pixels[6] += block[6];
+ pixels[7] += block[7];
+ pixels += line_size;
+ block += 8;
+ }
+}
+
+static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
+{
+ int i;
+ pixel *restrict pixels = (pixel *restrict)_pixels;
+ dctcoef *block = (dctcoef*)_block;
+ line_size /= sizeof(pixel);
+
+ for(i=0;i<4;i++) {
+ pixels[0] += block[0];
+ pixels[1] += block[1];
+ pixels[2] += block[2];
+ pixels[3] += block[3];
+ pixels += line_size;
+ block += 4;
+ }
+}
+
+#if 0
+
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ OP(*((uint64_t*)block), AV_RN64(pixels));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= AV_RN64(pixels );\
+ const uint64_t b= AV_RN64(pixels+1);\
+ OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= AV_RN64(pixels );\
+ const uint64_t b= AV_RN64(pixels+1);\
+ OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= AV_RN64(pixels );\
+ const uint64_t b= AV_RN64(pixels+line_size);\
+ OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ for(i=0; i<h; i++){\
+ const uint64_t a= AV_RN64(pixels );\
+ const uint64_t b= AV_RN64(pixels+line_size);\
+ OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ const uint64_t a= AV_RN64(pixels );\
+ const uint64_t b= AV_RN64(pixels+1);\
+ uint64_t l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0202020202020202ULL;\
+ uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ uint64_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint64_t a= AV_RN64(pixels );\
+ uint64_t b= AV_RN64(pixels+1);\
+ l1= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL);\
+ h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= AV_RN64(pixels );\
+ b= AV_RN64(pixels+1);\
+ l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0202020202020202ULL;\
+ h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ int i;\
+ const uint64_t a= AV_RN64(pixels );\
+ const uint64_t b= AV_RN64(pixels+1);\
+ uint64_t l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0101010101010101ULL;\
+ uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ uint64_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint64_t a= AV_RN64(pixels );\
+ uint64_t b= AV_RN64(pixels+1);\
+ l1= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL);\
+ h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= AV_RN64(pixels );\
+ b= AV_RN64(pixels+1);\
+ l0= (a&0x0303030303030303ULL)\
+ + (b&0x0303030303030303ULL)\
+ + 0x0101010101010101ULL;\
+ h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+ OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8*sizeof(pixel))\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8*sizeof(pixel))
+
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
+#else // 64 bit variant
+
+#define PIXOP2(OPNAME, OP) \
+static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ int i;\
+ for(i=0; i<h; i++){\
+ OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ int i;\
+ for(i=0; i<h; i++){\
+ OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ int i;\
+ for(i=0; i<h; i++){\
+ OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
+ OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
+}\
+\
+static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+ int src_stride1, int src_stride2, int h){\
+ int i;\
+ for(i=0; i<h; i++){\
+ pixel4 a,b;\
+ a= AV_RN4P(&src1[i*src_stride1 ]);\
+ b= AV_RN4P(&src2[i*src_stride2 ]);\
+ OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
+ a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
+ b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
+ OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
+ }\
+}\
+\
+static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+ int src_stride1, int src_stride2, int h){\
+ int i;\
+ for(i=0; i<h; i++){\
+ pixel4 a,b;\
+ a= AV_RN4P(&src1[i*src_stride1 ]);\
+ b= AV_RN4P(&src2[i*src_stride2 ]);\
+ OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
+ a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
+ b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
+ OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
+ }\
+}\
+\
+static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+ int src_stride1, int src_stride2, int h){\
+ int i;\
+ for(i=0; i<h; i++){\
+ pixel4 a,b;\
+ a= AV_RN4P(&src1[i*src_stride1 ]);\
+ b= AV_RN4P(&src2[i*src_stride2 ]);\
+ OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
+ }\
+}\
+\
+static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+ int src_stride1, int src_stride2, int h){\
+ int i;\
+ for(i=0; i<h; i++){\
+ pixel4 a,b;\
+ a= AV_RN2P(&src1[i*src_stride1 ]);\
+ b= AV_RN2P(&src2[i*src_stride2 ]);\
+ OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
+ }\
+}\
+\
+static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+ int src_stride1, int src_stride2, int h){\
+ FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
+ FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+ int src_stride1, int src_stride2, int h){\
+ FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
+ FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+ /* FIXME HIGH BIT DEPTH */\
+ int i;\
+ for(i=0; i<h; i++){\
+ uint32_t a, b, c, d, l0, l1, h0, h1;\
+ a= AV_RN32(&src1[i*src_stride1]);\
+ b= AV_RN32(&src2[i*src_stride2]);\
+ c= AV_RN32(&src3[i*src_stride3]);\
+ d= AV_RN32(&src4[i*src_stride4]);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ l1= (c&0x03030303UL)\
+ + (d&0x03030303UL);\
+ h1= ((c&0xFCFCFCFCUL)>>2)\
+ + ((d&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ a= AV_RN32(&src1[i*src_stride1+4]);\
+ b= AV_RN32(&src2[i*src_stride2+4]);\
+ c= AV_RN32(&src3[i*src_stride3+4]);\
+ d= AV_RN32(&src4[i*src_stride4+4]);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ l1= (c&0x03030303UL)\
+ + (d&0x03030303UL);\
+ h1= ((c&0xFCFCFCFCUL)>>2)\
+ + ((d&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ }\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+ FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+ /* FIXME HIGH BIT DEPTH*/\
+ int i;\
+ for(i=0; i<h; i++){\
+ uint32_t a, b, c, d, l0, l1, h0, h1;\
+ a= AV_RN32(&src1[i*src_stride1]);\
+ b= AV_RN32(&src2[i*src_stride2]);\
+ c= AV_RN32(&src3[i*src_stride3]);\
+ d= AV_RN32(&src4[i*src_stride4]);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x01010101UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ l1= (c&0x03030303UL)\
+ + (d&0x03030303UL);\
+ h1= ((c&0xFCFCFCFCUL)>>2)\
+ + ((d&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ a= AV_RN32(&src1[i*src_stride1+4]);\
+ b= AV_RN32(&src2[i*src_stride2+4]);\
+ c= AV_RN32(&src3[i*src_stride3+4]);\
+ d= AV_RN32(&src4[i*src_stride4+4]);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x01010101UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ l1= (c&0x03030303UL)\
+ + (d&0x03030303UL);\
+ h1= ((c&0xFCFCFCFCUL)>>2)\
+ + ((d&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ }\
+}\
+static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+ FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+ FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
+ int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+ FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+ FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
+{\
+ int i, a0, b0, a1, b1;\
+ pixel *block = (pixel*)_block;\
+ const pixel *pixels = (const pixel*)_pixels;\
+ line_size /= sizeof(pixel);\
+ a0= pixels[0];\
+ b0= pixels[1] + 2;\
+ a0 += b0;\
+ b0 += pixels[2];\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ a1= pixels[0];\
+ b1= pixels[1];\
+ a1 += b1;\
+ b1 += pixels[2];\
+\
+ block[0]= (a1+a0)>>2; /* FIXME non put */\
+ block[1]= (b1+b0)>>2;\
+\
+ pixels+=line_size;\
+ block +=line_size;\
+\
+ a0= pixels[0];\
+ b0= pixels[1] + 2;\
+ a0 += b0;\
+ b0 += pixels[2];\
+\
+ block[0]= (a1+a0)>>2;\
+ block[1]= (b1+b0)>>2;\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ /* FIXME HIGH BIT DEPTH */\
+ int i;\
+ const uint32_t a= AV_RN32(pixels );\
+ const uint32_t b= AV_RN32(pixels+1);\
+ uint32_t l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ uint32_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint32_t a= AV_RN32(pixels );\
+ uint32_t b= AV_RN32(pixels+1);\
+ l1= (a&0x03030303UL)\
+ + (b&0x03030303UL);\
+ h1= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= AV_RN32(pixels );\
+ b= AV_RN32(pixels+1);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+}\
+\
+static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ /* FIXME HIGH BIT DEPTH */\
+ int j;\
+ for(j=0; j<2; j++){\
+ int i;\
+ const uint32_t a= AV_RN32(pixels );\
+ const uint32_t b= AV_RN32(pixels+1);\
+ uint32_t l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ uint32_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint32_t a= AV_RN32(pixels );\
+ uint32_t b= AV_RN32(pixels+1);\
+ l1= (a&0x03030303UL)\
+ + (b&0x03030303UL);\
+ h1= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= AV_RN32(pixels );\
+ b= AV_RN32(pixels+1);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x02020202UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+ pixels+=4-line_size*(h+1);\
+ block +=4-line_size*h;\
+ }\
+}\
+\
+static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+ /* FIXME HIGH BIT DEPTH */\
+ int j;\
+ for(j=0; j<2; j++){\
+ int i;\
+ const uint32_t a= AV_RN32(pixels );\
+ const uint32_t b= AV_RN32(pixels+1);\
+ uint32_t l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x01010101UL;\
+ uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ uint32_t l1,h1;\
+\
+ pixels+=line_size;\
+ for(i=0; i<h; i+=2){\
+ uint32_t a= AV_RN32(pixels );\
+ uint32_t b= AV_RN32(pixels+1);\
+ l1= (a&0x03030303UL)\
+ + (b&0x03030303UL);\
+ h1= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ a= AV_RN32(pixels );\
+ b= AV_RN32(pixels+1);\
+ l0= (a&0x03030303UL)\
+ + (b&0x03030303UL)\
+ + 0x01010101UL;\
+ h0= ((a&0xFCFCFCFCUL)>>2)\
+ + ((b&0xFCFCFCFCUL)>>2);\
+ OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+ pixels+=line_size;\
+ block +=line_size;\
+ }\
+ pixels+=4-line_size*(h+1);\
+ block +=4-line_size*h;\
+ }\
+}\
+\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
+av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
+CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
+
+#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
+#endif
+#define op_put(a, b) a = b
+
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+
+#define put_no_rnd_pixels8_c put_pixels8_c
+#define put_no_rnd_pixels16_c put_pixels16_c
+
+static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+ FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h);
+}
+
+static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+ FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h);
+}
+
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ stride /= sizeof(pixel);\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0]));\
+ OP(dst[1], (A*src[1] + E*src[step+1]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}\
+\
+static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ stride /= sizeof(pixel);\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0]));\
+ OP(dst[1], (A*src[1] + E*src[step+1]));\
+ OP(dst[2], (A*src[2] + E*src[step+2]));\
+ OP(dst[3], (A*src[3] + E*src[step+3]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}\
+\
+static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ stride /= sizeof(pixel);\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+ OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
+ OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
+ OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
+ OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0]));\
+ OP(dst[1], (A*src[1] + E*src[step+1]));\
+ OP(dst[2], (A*src[2] + E*src[step+2]));\
+ OP(dst[3], (A*src[3] + E*src[step+3]));\
+ OP(dst[4], (A*src[4] + E*src[step+4]));\
+ OP(dst[5], (A*src[5] + E*src[step+5]));\
+ OP(dst[6], (A*src[6] + E*src[step+6]));\
+ OP(dst[7], (A*src[7] + E*src[step+7]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}
+
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+
+H264_CHROMA_MC(put_ , op_put)
+H264_CHROMA_MC(avg_ , op_avg)
+#undef op_avg
+#undef op_put
+
+#define H264_LOWPASS(OPNAME, OP, OP2) \
+static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
+ const int h=2;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ for(i=0; i<h; i++)\
+ {\
+ OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+ OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+ dst+=dstStride;\
+ src+=srcStride;\
+ }\
+}\
+\
+static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
+ const int w=2;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ for(i=0; i<w; i++)\
+ {\
+ const int srcB= src[-2*srcStride];\
+ const int srcA= src[-1*srcStride];\
+ const int src0= src[0 *srcStride];\
+ const int src1= src[1 *srcStride];\
+ const int src2= src[2 *srcStride];\
+ const int src3= src[3 *srcStride];\
+ const int src4= src[4 *srcStride];\
+ OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+ OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+ dst++;\
+ src++;\
+ }\
+}\
+\
+static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+ const int h=2;\
+ const int w=2;\
+ const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ src -= 2*srcStride;\
+ for(i=0; i<h+5; i++)\
+ {\
+ tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
+ tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
+ tmp+=tmpStride;\
+ src+=srcStride;\
+ }\
+ tmp -= tmpStride*(h+5-2);\
+ for(i=0; i<w; i++)\
+ {\
+ const int tmpB= tmp[-2*tmpStride] - pad;\
+ const int tmpA= tmp[-1*tmpStride] - pad;\
+ const int tmp0= tmp[0 *tmpStride] - pad;\
+ const int tmp1= tmp[1 *tmpStride] - pad;\
+ const int tmp2= tmp[2 *tmpStride] - pad;\
+ const int tmp3= tmp[3 *tmpStride] - pad;\
+ const int tmp4= tmp[4 *tmpStride] - pad;\
+ OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+ OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+ dst++;\
+ tmp++;\
+ }\
+}\
+static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
+ const int h=4;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ for(i=0; i<h; i++)\
+ {\
+ OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+ OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+ OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
+ OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
+ dst+=dstStride;\
+ src+=srcStride;\
+ }\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
+ const int w=4;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ for(i=0; i<w; i++)\
+ {\
+ const int srcB= src[-2*srcStride];\
+ const int srcA= src[-1*srcStride];\
+ const int src0= src[0 *srcStride];\
+ const int src1= src[1 *srcStride];\
+ const int src2= src[2 *srcStride];\
+ const int src3= src[3 *srcStride];\
+ const int src4= src[4 *srcStride];\
+ const int src5= src[5 *srcStride];\
+ const int src6= src[6 *srcStride];\
+ OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+ OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+ OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+ OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+ dst++;\
+ src++;\
+ }\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+ const int h=4;\
+ const int w=4;\
+ const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ src -= 2*srcStride;\
+ for(i=0; i<h+5; i++)\
+ {\
+ tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
+ tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
+ tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
+ tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
+ tmp+=tmpStride;\
+ src+=srcStride;\
+ }\
+ tmp -= tmpStride*(h+5-2);\
+ for(i=0; i<w; i++)\
+ {\
+ const int tmpB= tmp[-2*tmpStride] - pad;\
+ const int tmpA= tmp[-1*tmpStride] - pad;\
+ const int tmp0= tmp[0 *tmpStride] - pad;\
+ const int tmp1= tmp[1 *tmpStride] - pad;\
+ const int tmp2= tmp[2 *tmpStride] - pad;\
+ const int tmp3= tmp[3 *tmpStride] - pad;\
+ const int tmp4= tmp[4 *tmpStride] - pad;\
+ const int tmp5= tmp[5 *tmpStride] - pad;\
+ const int tmp6= tmp[6 *tmpStride] - pad;\
+ OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+ OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+ OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+ OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+ dst++;\
+ tmp++;\
+ }\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
+ const int h=8;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ for(i=0; i<h; i++)\
+ {\
+ OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
+ OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
+ OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
+ OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
+ OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
+ OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
+ OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
+ OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
+ dst+=dstStride;\
+ src+=srcStride;\
+ }\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
+ const int w=8;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ for(i=0; i<w; i++)\
+ {\
+ const int srcB= src[-2*srcStride];\
+ const int srcA= src[-1*srcStride];\
+ const int src0= src[0 *srcStride];\
+ const int src1= src[1 *srcStride];\
+ const int src2= src[2 *srcStride];\
+ const int src3= src[3 *srcStride];\
+ const int src4= src[4 *srcStride];\
+ const int src5= src[5 *srcStride];\
+ const int src6= src[6 *srcStride];\
+ const int src7= src[7 *srcStride];\
+ const int src8= src[8 *srcStride];\
+ const int src9= src[9 *srcStride];\
+ const int src10=src[10*srcStride];\
+ OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+ OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+ OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+ OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+ OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+ OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+ OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+ OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+ dst++;\
+ src++;\
+ }\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+ const int h=8;\
+ const int w=8;\
+ const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+ INIT_CLIP\
+ int i;\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ dstStride /= sizeof(pixel);\
+ srcStride /= sizeof(pixel);\
+ src -= 2*srcStride;\
+ for(i=0; i<h+5; i++)\
+ {\
+ tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
+ tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
+ tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
+ tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
+ tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
+ tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
+ tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
+ tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
+ tmp+=tmpStride;\
+ src+=srcStride;\
+ }\
+ tmp -= tmpStride*(h+5-2);\
+ for(i=0; i<w; i++)\
+ {\
+ const int tmpB= tmp[-2*tmpStride] - pad;\
+ const int tmpA= tmp[-1*tmpStride] - pad;\
+ const int tmp0= tmp[0 *tmpStride] - pad;\
+ const int tmp1= tmp[1 *tmpStride] - pad;\
+ const int tmp2= tmp[2 *tmpStride] - pad;\
+ const int tmp3= tmp[3 *tmpStride] - pad;\
+ const int tmp4= tmp[4 *tmpStride] - pad;\
+ const int tmp5= tmp[5 *tmpStride] - pad;\
+ const int tmp6= tmp[6 *tmpStride] - pad;\
+ const int tmp7= tmp[7 *tmpStride] - pad;\
+ const int tmp8= tmp[8 *tmpStride] - pad;\
+ const int tmp9= tmp[9 *tmpStride] - pad;\
+ const int tmp10=tmp[10*tmpStride] - pad;\
+ OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+ OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+ OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+ OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+ OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
+ OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
+ OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
+ OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
+ dst++;\
+ tmp++;\
+ }\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
+ FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
+ FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
+ FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
+ FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
+}\
+\
+static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
+ FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
+ FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
+}\
+
+#define H264_MC(OPNAME, SIZE) \
+static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
+ FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
+ FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
+ FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
+ FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
+ FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
+ FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
+ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+ FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
+ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
+ FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
+ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
+ FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
+ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+ uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
+ uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
+ FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
+ FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
+ FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
+ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
+}\
+
+#define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
+//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
+#define op_put(a, b) a = CLIP(((b) + 16)>>5)
+#define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
+#define op2_put(a, b) a = CLIP(((b) + 512)>>10)
+
+H264_LOWPASS(put_ , op_put, op2_put)
+H264_LOWPASS(avg_ , op_avg, op2_avg)
+H264_MC(put_, 2)
+H264_MC(put_, 4)
+H264_MC(put_, 8)
+H264_MC(put_, 16)
+H264_MC(avg_, 4)
+H264_MC(avg_, 8)
+H264_MC(avg_, 16)
+
+#undef op_avg
+#undef op_put
+#undef op2_avg
+#undef op2_put
+
+#if BIT_DEPTH == 8
+# define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
+# define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
+# define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
+# define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
+#elif BIT_DEPTH == 9
+# define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
+# define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
+# define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
+# define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
+#elif BIT_DEPTH == 10
+# define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
+# define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
+# define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
+# define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
+#endif
+
+void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
+ FUNCC(put_pixels8)(dst, src, stride, 8);
+}
+void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
+ FUNCC(avg_pixels8)(dst, src, stride, 8);
+}
+void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
+ FUNCC(put_pixels16)(dst, src, stride, 16);
+}
+void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
+ FUNCC(avg_pixels16)(dst, src, stride, 16);
+}
+
+static void FUNCC(clear_block)(DCTELEM *block)
+{
+ memset(block, 0, sizeof(dctcoef)*64);
+}
+
+/**
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)
+ */
+static void FUNCC(clear_blocks)(DCTELEM *blocks)
+{
+ memset(blocks, 0, sizeof(dctcoef)*6*64);
+}
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 637d09dba5..811a75a06f 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -21,6 +21,7 @@
#include "libavutil/crc.h"
#include "libavutil/md5.h"
+#include "libavutil/opt.h"
#include "avcodec.h"
#include "get_bits.h"
#include "golomb.h"
@@ -43,7 +44,7 @@
typedef struct CompressionOptions {
int compression_level;
int block_time_ms;
- enum AVLPCType lpc_type;
+ enum FFLPCType lpc_type;
int lpc_passes;
int lpc_coeff_precision;
int min_prediction_order;
@@ -80,6 +81,7 @@ typedef struct FlacFrame {
} FlacFrame;
typedef struct FlacEncodeContext {
+ AVClass *class;
PutBitContext pb;
int channels;
int samplerate;
@@ -156,16 +158,16 @@ static av_cold void dprint_compression_options(FlacEncodeContext *s)
av_log(avctx, AV_LOG_DEBUG, " compression: %d\n", opt->compression_level);
switch (opt->lpc_type) {
- case AV_LPC_TYPE_NONE:
+ case FF_LPC_TYPE_NONE:
av_log(avctx, AV_LOG_DEBUG, " lpc type: None\n");
break;
- case AV_LPC_TYPE_FIXED:
+ case FF_LPC_TYPE_FIXED:
av_log(avctx, AV_LOG_DEBUG, " lpc type: Fixed pre-defined coefficients\n");
break;
- case AV_LPC_TYPE_LEVINSON:
+ case FF_LPC_TYPE_LEVINSON:
av_log(avctx, AV_LOG_DEBUG, " lpc type: Levinson-Durbin recursion with Welch window\n");
break;
- case AV_LPC_TYPE_CHOLESKY:
+ case FF_LPC_TYPE_CHOLESKY:
av_log(avctx, AV_LOG_DEBUG, " lpc type: Cholesky factorization, %d pass%s\n",
opt->lpc_passes, opt->lpc_passes == 1 ? "" : "es");
break;
@@ -266,32 +268,42 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
s->options.block_time_ms = ((int[]){ 27, 27, 27,105,105,105,105,105,105,105,105,105,105})[level];
- s->options.lpc_type = ((int[]){ AV_LPC_TYPE_FIXED, AV_LPC_TYPE_FIXED, AV_LPC_TYPE_FIXED,
- AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON,
- AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON,
- AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON, AV_LPC_TYPE_LEVINSON,
- AV_LPC_TYPE_LEVINSON})[level];
+ if (s->options.lpc_type == FF_LPC_TYPE_DEFAULT)
+ s->options.lpc_type = ((int[]){ FF_LPC_TYPE_FIXED, FF_LPC_TYPE_FIXED, FF_LPC_TYPE_FIXED,
+ FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
+ FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
+ FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
+ FF_LPC_TYPE_LEVINSON})[level];
s->options.min_prediction_order = ((int[]){ 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1})[level];
s->options.max_prediction_order = ((int[]){ 3, 4, 4, 6, 8, 8, 8, 8, 12, 12, 12, 32, 32})[level];
- s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST,
- ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST,
- ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG, ORDER_METHOD_4LEVEL,
- ORDER_METHOD_LOG, ORDER_METHOD_SEARCH, ORDER_METHOD_LOG,
- ORDER_METHOD_SEARCH})[level];
+ if (s->options.prediction_order_method < 0)
+ s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST,
+ ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST,
+ ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG, ORDER_METHOD_4LEVEL,
+ ORDER_METHOD_LOG, ORDER_METHOD_SEARCH, ORDER_METHOD_LOG,
+ ORDER_METHOD_SEARCH})[level];
- s->options.min_partition_order = ((int[]){ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})[level];
- s->options.max_partition_order = ((int[]){ 2, 2, 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 8})[level];
+ if (s->options.min_partition_order > s->options.max_partition_order) {
+ av_log(avctx, AV_LOG_ERROR, "invalid partition orders: min=%d max=%d\n",
+ s->options.min_partition_order, s->options.max_partition_order);
+ return AVERROR(EINVAL);
+ }
+ if (s->options.min_partition_order < 0)
+ s->options.min_partition_order = ((int[]){ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})[level];
+ if (s->options.max_partition_order < 0)
+ s->options.max_partition_order = ((int[]){ 2, 2, 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 8})[level];
/* set compression option overrides from AVCodecContext */
- if (avctx->lpc_type > AV_LPC_TYPE_DEFAULT) {
- if (avctx->lpc_type > AV_LPC_TYPE_CHOLESKY) {
+#if FF_API_FLAC_GLOBAL_OPTS
+ if (avctx->lpc_type > FF_LPC_TYPE_DEFAULT) {
+ if (avctx->lpc_type > FF_LPC_TYPE_CHOLESKY) {
av_log(avctx, AV_LOG_ERROR, "unknown lpc type: %d\n", avctx->lpc_type);
return -1;
}
s->options.lpc_type = avctx->lpc_type;
- if (s->options.lpc_type == AV_LPC_TYPE_CHOLESKY) {
+ if (s->options.lpc_type == FF_LPC_TYPE_CHOLESKY) {
if (avctx->lpc_passes < 0) {
// default number of passes for Cholesky
s->options.lpc_passes = 2;
@@ -304,11 +316,12 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
}
}
}
+#endif
- if (s->options.lpc_type == AV_LPC_TYPE_NONE) {
+ if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
s->options.min_prediction_order = 0;
} else if (avctx->min_prediction_order >= 0) {
- if (s->options.lpc_type == AV_LPC_TYPE_FIXED) {
+ if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
if (avctx->min_prediction_order > MAX_FIXED_ORDER) {
av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
avctx->min_prediction_order);
@@ -322,10 +335,10 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
}
s->options.min_prediction_order = avctx->min_prediction_order;
}
- if (s->options.lpc_type == AV_LPC_TYPE_NONE) {
+ if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
s->options.max_prediction_order = 0;
} else if (avctx->max_prediction_order >= 0) {
- if (s->options.lpc_type == AV_LPC_TYPE_FIXED) {
+ if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
if (avctx->max_prediction_order > MAX_FIXED_ORDER) {
av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
avctx->max_prediction_order);
@@ -345,6 +358,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
return -1;
}
+#if FF_API_FLAC_GLOBAL_OPTS
if (avctx->prediction_order_method >= 0) {
if (avctx->prediction_order_method > ORDER_METHOD_LOG) {
av_log(avctx, AV_LOG_ERROR, "invalid prediction order method: %d\n",
@@ -375,6 +389,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
s->options.min_partition_order, s->options.max_partition_order);
return -1;
}
+#endif
if (avctx->frame_size > 0) {
if (avctx->frame_size < FLAC_MIN_BLOCKSIZE ||
@@ -388,6 +403,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
}
s->max_blocksize = s->avctx->frame_size;
+#if FF_API_FLAC_GLOBAL_OPTS
/* set LPC precision */
if (avctx->lpc_coeff_precision > 0) {
if (avctx->lpc_coeff_precision > MAX_LPC_PRECISION) {
@@ -396,10 +412,8 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
return -1;
}
s->options.lpc_coeff_precision = avctx->lpc_coeff_precision;
- } else {
- /* default LPC precision */
- s->options.lpc_coeff_precision = 15;
}
+#endif
/* set maximum encoded frame size in verbatim mode */
s->max_framesize = ff_flac_get_max_frame_size(s->avctx->frame_size,
@@ -448,7 +462,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
}
ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
- s->options.max_prediction_order, AV_LPC_TYPE_LEVINSON);
+ s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
dprint_compression_options(s);
@@ -889,8 +903,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
/* FIXED */
sub->type = FLAC_SUBFRAME_FIXED;
- if (s->options.lpc_type == AV_LPC_TYPE_NONE ||
- s->options.lpc_type == AV_LPC_TYPE_FIXED || n <= max_order) {
+ if (s->options.lpc_type == FF_LPC_TYPE_NONE ||
+ s->options.lpc_type == FF_LPC_TYPE_FIXED || n <= max_order) {
uint32_t bits[MAX_FIXED_ORDER+1];
if (max_order > MAX_FIXED_ORDER)
max_order = MAX_FIXED_ORDER;
@@ -1336,6 +1350,33 @@ static av_cold int flac_encode_close(AVCodecContext *avctx)
return 0;
}
+#define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
+static const AVOption options[] = {
+{ "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), FF_OPT_TYPE_INT, 15, 0, MAX_LPC_PRECISION, FLAGS },
+{ "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), FF_OPT_TYPE_INT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, "lpc_type" },
+{ "none", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_NONE, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "fixed", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_FIXED, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "levinson", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_LEVINSON, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "cholesky", NULL, 0, FF_OPT_TYPE_CONST, FF_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
+{ "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, FLAGS },
+{ "min_partition_order", NULL, offsetof(FlacEncodeContext, options.min_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS },
+{ "max_partition_order", NULL, offsetof(FlacEncodeContext, options.max_partition_order), FF_OPT_TYPE_INT, -1, -1, MAX_PARTITION_ORDER, FLAGS },
+{ "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), FF_OPT_TYPE_INT, -1, -1, ORDER_METHOD_LOG, FLAGS, "predm" },
+{ "estimation", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_EST, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "2level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_2LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "4level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_4LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "8level", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_8LEVEL, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "search", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_SEARCH, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ "log", NULL, 0, FF_OPT_TYPE_CONST, ORDER_METHOD_LOG, INT_MIN, INT_MAX, FLAGS, "predm" },
+{ NULL },
+};
+
+static const AVClass flac_encoder_class = {
+ "FLAC encoder",
+ av_default_item_name,
+ options,
+ LIBAVUTIL_VERSION_INT,
+};
AVCodec ff_flac_encoder = {
"flac",
@@ -1349,4 +1390,5 @@ AVCodec ff_flac_encoder = {
.capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
.sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
.long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"),
+ .priv_class = &flac_encoder_class,
};
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 353a0b343b..a843d21446 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -45,11 +45,11 @@
//#undef NDEBUG
#include <assert.h>
-static const uint8_t rem6[QP_MAX_MAX+1]={
+static const uint8_t rem6[QP_MAX_NUM+1]={
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
};
-static const uint8_t div6[QP_MAX_MAX+1]={
+static const uint8_t div6[QP_MAX_NUM+1]={
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
};
@@ -586,6 +586,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
h->pixel_shift = 0;
+ h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
h->thread_context[0] = h;
h->outputed_poc = h->next_outputed_poc = INT_MIN;
@@ -733,6 +734,7 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex
int ff_h264_frame_start(H264Context *h){
MpegEncContext * const s = &h->s;
int i;
+ const int pixel_shift = h->pixel_shift;
if(MPV_frame_start(s, s->avctx) < 0)
return -1;
@@ -749,14 +751,14 @@ int ff_h264_frame_start(H264Context *h){
assert(s->linesize && s->uvlinesize);
for(i=0; i<16; i++){
- h->block_offset[i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
- h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
+ h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
+ h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
}
for(i=0; i<4; i++){
h->block_offset[16+i]=
- h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+ h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
h->block_offset[24+16+i]=
- h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+ h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
}
/* can't be in alloc_tables because linesize isn't known there.
@@ -948,6 +950,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
MpegEncContext * const s = &h->s;
uint8_t *top_border;
int top_idx = 1;
+ const int pixel_shift = h->pixel_shift;
src_y -= linesize;
src_cb -= uvlinesize;
@@ -958,10 +961,10 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
if(!MB_MBAFF){
top_border = h->top_borders[0][s->mb_x];
AV_COPY128(top_border, src_y + 15*linesize);
- if (h->pixel_shift)
+ if (pixel_shift)
AV_COPY128(top_border+16, src_y+15*linesize+16);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
- if (h->pixel_shift) {
+ if (pixel_shift) {
AV_COPY128(top_border+32, src_cb+7*uvlinesize);
AV_COPY128(top_border+48, src_cr+7*uvlinesize);
} else {
@@ -980,11 +983,11 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
// There are two lines saved, the line above the the top macroblock of a pair,
// and the line above the bottom macroblock
AV_COPY128(top_border, src_y + 16*linesize);
- if (h->pixel_shift)
+ if (pixel_shift)
AV_COPY128(top_border+16, src_y+16*linesize+16);
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
- if (h->pixel_shift) {
+ if (pixel_shift) {
AV_COPY128(top_border+32, src_cb+8*uvlinesize);
AV_COPY128(top_border+48, src_cr+8*uvlinesize);
} else {
@@ -994,7 +997,10 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
}
}
-static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple, int pixel_shift){
+static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
+ uint8_t *src_cb, uint8_t *src_cr,
+ int linesize, int uvlinesize,
+ int xchg, int simple, int pixel_shift){
MpegEncContext * const s = &h->s;
int deblock_left;
int deblock_top;
@@ -1040,38 +1046,38 @@ else AV_COPY64(b,a);
if(deblock_top){
if(deblock_left){
- XCHG(top_border_m1+(8<<pixel_shift), src_y -(7<<h->pixel_shift), 1);
+ XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
}
- XCHG(top_border+(0<<pixel_shift), src_y +(1<<pixel_shift), xchg);
- XCHG(top_border+(8<<pixel_shift), src_y +(9<<pixel_shift), 1);
+ XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
+ XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
if(s->mb_x+1 < s->mb_width){
- XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<pixel_shift), 1);
+ XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
}
}
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_top){
if(deblock_left){
- XCHG(top_border_m1+(16<<pixel_shift), src_cb -(7<<pixel_shift), 1);
- XCHG(top_border_m1+(24<<pixel_shift), src_cr -(7<<pixel_shift), 1);
+ XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
+ XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
}
- XCHG(top_border+(16<<pixel_shift), src_cb+1+pixel_shift, 1);
- XCHG(top_border+(24<<pixel_shift), src_cr+1+pixel_shift, 1);
+ XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
+ XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
}
}
}
-static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index, int pixel_shift) {
- if (!pixel_shift)
- return mb[index];
- else
- return ((int32_t*)mb)[index];
+static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
+ if (high_bit_depth) {
+ return AV_RN32A(((int32_t*)mb) + index);
+ } else
+ return AV_RN16A(mb + index);
}
-static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value, int pixel_shift) {
- if (!pixel_shift)
- mb[index] = value;
- else
- ((int32_t*)mb)[index] = value;
+static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
+ if (high_bit_depth) {
+ AV_WN32A(((int32_t*)mb) + index, value);
+ } else
+ AV_WN16A(mb + index, value);
}
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
@@ -1090,12 +1096,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
- dest_y = s->current_picture.data[0] + ((mb_x<<pixel_shift) + mb_y * s->linesize ) * 16;
- dest_cb = s->current_picture.data[1] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8;
- dest_cr = s->current_picture.data[2] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8;
+ dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
+ dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
+ dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
- s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<pixel_shift), s->linesize, 4);
- s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<pixel_shift), dest_cr - dest_cb, 2);
+ s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
+ s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
h->list_counts[mb_xy]= h->list_count;
@@ -1186,16 +1192,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
uint8_t * const ptr= dest_y + block_offset[i];
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
- h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize);
+ h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
}else{
const int nnz = h->non_zero_count_cache[ scan8[i] ];
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
(h->topright_samples_available<<i)&0x4000, linesize);
if(nnz){
- if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
- idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize);
+ if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
+ idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
else
- idct_add (ptr, h->mb + (i*16<<pixel_shift), linesize);
+ idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize);
}
}
}
@@ -1212,7 +1218,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
- h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize);
+ h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize);
}else{
uint8_t *topright;
int nnz, tr;
@@ -1229,7 +1235,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
topright= (uint8_t*) &tr;
}
}else
- topright= ptr + (4<<pixel_shift) - linesize;
+ topright= ptr + (4 << pixel_shift) - linesize;
}else
topright= NULL;
@@ -1237,8 +1243,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
nnz = h->non_zero_count_cache[ scan8[i] ];
if(nnz){
if(is_h264){
- if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
- idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize);
+ if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16))
+ idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize);
else
idct_add (ptr, h->mb + (i*16<<pixel_shift), linesize);
}
@@ -1261,7 +1267,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
for(i = 0; i < 16; i++)
- dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i,pixel_shift),pixel_shift);
+ dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i));
}
}
}
@@ -1288,8 +1294,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
}else{
for(i=0; i<16; i++){
- if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
- s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize);
+ if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+ s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
}
}
}else{
@@ -1301,7 +1307,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
for(i=0; i<16; i+=di){
if(h->non_zero_count_cache[ scan8[i] ]){
- idct_add(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize);
+ idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize);
}
}
}else{
@@ -1329,21 +1335,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
uint8_t *dest[2] = {dest_cb, dest_cr};
if(transform_bypass){
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
- h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<pixel_shift), uvlinesize);
- h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<pixel_shift), uvlinesize);
+ h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize);
+ h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize);
}else{
idct_add = s->dsp.add_pixels4;
for(i=16; i<16+8; i++){
- if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
- idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<pixel_shift), uvlinesize);
+ if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
+ idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
}
}
}else{
if(is_h264){
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+ h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+ h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
h->h264dsp.h264_idct_add8(dest, block_offset,
h->mb, uvlinesize,
h->non_zero_count_cache);
@@ -1370,9 +1376,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
/**
* Process a macroblock; this case avoids checks for expensive uncommon cases.
*/
-static void hl_decode_mb_simple8(H264Context *h){
- hl_decode_mb_internal(h, 1, 0);
+#define hl_decode_mb_simple(sh, bits) \
+static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
+ hl_decode_mb_internal(h, 1, sh); \
}
+hl_decode_mb_simple(0, 8);
+hl_decode_mb_simple(1, 16);
/**
* Process a macroblock; this handles edge cases, such as interlacing.
@@ -1387,11 +1396,12 @@ void ff_h264_hl_decode_mb(H264Context *h){
const int mb_type= s->current_picture.mb_type[mb_xy];
int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
- if (is_complex || h->pixel_shift)
+ if (is_complex) {
hl_decode_mb_complex(h);
- else{
- hl_decode_mb_simple8(h);
- }
+ } else if (h->pixel_shift) {
+ hl_decode_mb_simple_16(h);
+ } else
+ hl_decode_mb_simple_8(h);
}
static int pred_weight_table(H264Context *h){
@@ -2557,6 +2567,7 @@ static void loop_filter(H264Context *h){
const int end_mb_y= s->mb_y + FRAME_MBAFF;
const int old_slice_type= h->slice_type;
const int end_mb_x = s->mb_x;
+ const int pixel_shift = h->pixel_shift;
if(h->deblocking_filter) {
int start_x= s->resync_mb_y == s->mb_y ? s->resync_mb_x : 0;
@@ -2573,9 +2584,9 @@ static void loop_filter(H264Context *h){
s->mb_x= mb_x;
s->mb_y= mb_y;
- dest_y = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize ) * 16;
- dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
- dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
+ dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
+ dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
+ dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
//FIXME simplify above
if (MB_FIELD) {
@@ -3060,7 +3071,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
- h->pixel_shift = h->sps.bit_depth_luma/9;
+ h->pixel_shift = h->sps.bit_depth_luma > 8;
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index a1db628bdc..04da701750 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -108,7 +108,7 @@
*/
#define DELAYED_PIC_REF 4
-#define QP_MAX_MAX (51 + 2*6) // The maximum supported qp
+#define QP_MAX_NUM (51 + 2*6) // The maximum supported qp
/* NAL unit types */
enum {
@@ -266,7 +266,7 @@ typedef struct MMCO{
typedef struct H264Context{
MpegEncContext s;
H264DSPContext h264dsp;
- int pixel_shift;
+ int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264
int chroma_qp[2]; //QPc
int qp_thresh; ///< QP threshold to skip loopfilter
@@ -355,8 +355,8 @@ typedef struct H264Context{
*/
PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
- uint32_t dequant4_buffer[6][QP_MAX_MAX+1][16]; //FIXME should these be moved down?
- uint32_t dequant8_buffer[2][QP_MAX_MAX+1][64];
+ uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
+ uint32_t dequant8_buffer[2][QP_MAX_NUM+1][64];
uint32_t (*dequant4_coeff[6])[16];
uint32_t (*dequant8_coeff[2])[64];
@@ -597,7 +597,7 @@ typedef struct H264Context{
}H264Context;
-extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_MAX+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10).
+extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10).
/**
* Decode SEI
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 6017afc7aa..925ac44498 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1103,10 +1103,11 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
#define STORE_BLOCK(type) \
- do {\
- uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;\
-\
- int j= scantable[index[--coeff_count]];\
+ do { \
+ uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; \
+ \
+ int j= scantable[index[--coeff_count]]; \
+ \
if( get_cabac( CC, ctx ) == 0 ) { \
node_ctx = coeff_abs_level_transition[0][node_ctx]; \
if( is_dc ) { \
@@ -1141,8 +1142,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
}else{ \
((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \
} \
- }\
- } while( coeff_count );
+ } \
+ } while ( coeff_count );
if (h->pixel_shift) {
STORE_BLOCK(int32_t)
@@ -1204,6 +1205,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
int mb_xy;
int mb_type, partition_count, cbp = 0;
int dct8x8_allowed= h->pps.transform_8x8_mode;
+ const int pixel_shift = h->pixel_shift;
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -1312,7 +1314,7 @@ decode_intra_mb:
h->slice_table[ mb_xy ]= h->slice_num;
if(IS_INTRA_PCM(mb_type)) {
- const int mb_size = 384*h->sps.bit_depth_luma/8;
+ const int mb_size = (384*h->sps.bit_depth_luma) >> 3;
const uint8_t *ptr;
// We assume these blocks are very rare so we do not optimize it.
@@ -1670,7 +1672,7 @@ decode_intra_mb:
qmul = h->dequant4_coeff[0][s->qscale];
for( i = 0; i < 16; i++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
- decode_cabac_residual_nondc(h, h->mb + (16*i<<h->pixel_shift), 1, i, scan + 1, qmul, 15);
+ decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15);
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
@@ -1680,7 +1682,7 @@ decode_intra_mb:
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
if( cbp & (1<<i8x8) ) {
if( IS_8x8DCT(mb_type) ) {
- decode_cabac_residual_nondc(h, h->mb + (64*i8x8<<h->pixel_shift), 5, 4*i8x8,
+ decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8,
scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
} else {
qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
@@ -1688,7 +1690,7 @@ decode_intra_mb:
const int index = 4*i8x8 + i4x4;
//av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
- decode_cabac_residual_nondc(h, h->mb + (16*index<<h->pixel_shift), 2, index, scan, qmul, 16);
+ decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
}
}
@@ -1703,7 +1705,7 @@ decode_intra_mb:
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
- decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c)<<h->pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
+ decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
@@ -1714,7 +1716,7 @@ decode_intra_mb:
for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
- decode_cabac_residual_nondc(h, h->mb + (16*index<<h->pixel_shift), 4, index, scan + 1, qmul, 15);
+ decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
}
}
} else {
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 61b7436739..62e30f1311 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -542,6 +542,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
int partition_count;
unsigned int mb_type, cbp;
int dct8x8_allowed= h->pps.transform_8x8_mode;
+ const int pixel_shift = h->pixel_shift;
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -961,7 +962,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){
for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8;
- if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index<<h->pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
+ if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
return -1;
}
}
@@ -973,7 +974,7 @@ decode_intra_mb:
for(i8x8=0; i8x8<4; i8x8++){
if(cbp & (1<<i8x8)){
if(IS_8x8DCT(mb_type)){
- DCTELEM *buf = &h->mb[64*i8x8<<h->pixel_shift];
+ DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
uint8_t *nnz;
for(i4x4=0; i4x4<4; i4x4++){
if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
@@ -986,7 +987,7 @@ decode_intra_mb:
for(i4x4=0; i4x4<4; i4x4++){
const int index= i4x4 + 4*i8x8;
- if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
+ if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
return -1;
}
}
@@ -1000,7 +1001,7 @@ decode_intra_mb:
if(cbp&0x30){
for(chroma_idx=0; chroma_idx<2; chroma_idx++)
- if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx)<<h->pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
+ if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
return -1;
}
}
@@ -1010,7 +1011,7 @@ decode_intra_mb:
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
for(i4x4=0; i4x4<4; i4x4++){
const int index= 16 + 4*chroma_idx + i4x4;
- if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan + 1, qmul, 15) < 0){
+ if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
return -1;
}
}
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 325fd3cc61..2e61a3110a 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -101,197 +101,92 @@ static const uint8_t tc0_table[52*3][4] = {
};
static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) {
- const int bit_depth = h->sps.bit_depth_luma;
- const int qp_bd_offset = 6*(bit_depth-8);
+ const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
- const int alpha = alpha_table[index_a] << (bit_depth-8);
- const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8);
+ const int alpha = alpha_table[index_a];
+ const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
if (alpha ==0 || beta == 0) return;
if( bS[0] < 4 ) {
int8_t tc[4];
- tc[0] = tc0_table[index_a][bS[0]] << (bit_depth-8);
- tc[1] = tc0_table[index_a][bS[1]] << (bit_depth-8);
- tc[2] = tc0_table[index_a][bS[2]] << (bit_depth-8);
- tc[3] = tc0_table[index_a][bS[3]] << (bit_depth-8);
+ tc[0] = tc0_table[index_a][bS[0]];
+ tc[1] = tc0_table[index_a][bS[1]];
+ tc[2] = tc0_table[index_a][bS[2]];
+ tc[3] = tc0_table[index_a][bS[3]];
h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
} else {
h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
}
}
static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
- const int bit_depth = h->sps.bit_depth_luma;
- const int qp_bd_offset = 6*(bit_depth-8);
+ const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
- const int alpha = alpha_table[index_a] << (bit_depth-8);
- const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8);
+ const int alpha = alpha_table[index_a];
+ const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
if (alpha ==0 || beta == 0) return;
if( bS[0] < 4 ) {
int8_t tc[4];
- tc[0] = (tc0_table[index_a][bS[0]] << (bit_depth-8))+1;
- tc[1] = (tc0_table[index_a][bS[1]] << (bit_depth-8))+1;
- tc[2] = (tc0_table[index_a][bS[2]] << (bit_depth-8))+1;
- tc[3] = (tc0_table[index_a][bS[3]] << (bit_depth-8))+1;
+ tc[0] = tc0_table[index_a][bS[0]]+1;
+ tc[1] = tc0_table[index_a][bS[1]]+1;
+ tc[2] = tc0_table[index_a][bS[2]]+1;
+ tc[3] = tc0_table[index_a][bS[3]]+1;
h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else {
h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
}
}
-static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) {
- int i;
- const int bit_depth = h->sps.bit_depth_luma;
- const int qp_bd_offset = 6*(bit_depth-8);
+static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
+ const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
- int alpha = alpha_table[index_a] << (bit_depth-8);
- int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8);
- for( i = 0; i < 8; i++, pix += stride) {
- const int bS_index = (i >> 1) * bsi;
-
- if( bS[bS_index] == 0 ) {
- continue;
- }
-
- if( bS[bS_index] < 4 ) {
- const int tc0 = tc0_table[index_a][bS[bS_index]] << (bit_depth-8);
- const int p0 = pix[-1];
- const int p1 = pix[-2];
- const int p2 = pix[-3];
- const int q0 = pix[0];
- const int q1 = pix[1];
- const int q2 = pix[2];
-
- if( FFABS( p0 - q0 ) < alpha &&
- FFABS( p1 - p0 ) < beta &&
- FFABS( q1 - q0 ) < beta ) {
- int tc = tc0;
- int i_delta;
-
- if( FFABS( p2 - p0 ) < beta ) {
- if(tc0)
- pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
- tc++;
- }
- if( FFABS( q2 - q0 ) < beta ) {
- if(tc0)
- pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
- tc++;
- }
-
- i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
- pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
- tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
- }
- }else{
- const int p0 = pix[-1];
- const int p1 = pix[-2];
- const int p2 = pix[-3];
-
- const int q0 = pix[0];
- const int q1 = pix[1];
- const int q2 = pix[2];
-
- if( FFABS( p0 - q0 ) < alpha &&
- FFABS( p1 - p0 ) < beta &&
- FFABS( q1 - q0 ) < beta ) {
+ int alpha = alpha_table[index_a];
+ int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
+ if (alpha ==0 || beta == 0) return;
- if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
- if( FFABS( p2 - p0 ) < beta)
- {
- const int p3 = pix[-4];
- /* p0', p1', p2' */
- pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
- pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
- pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
- } else {
- /* p0' */
- pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- }
- if( FFABS( q2 - q0 ) < beta)
- {
- const int q3 = pix[3];
- /* q0', q1', q2' */
- pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
- pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
- pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
- } else {
- /* q0' */
- pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- }
- }else{
- /* p0', q0' */
- pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- }
- tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
- }
- }
+ if( bS[0] < 4 ) {
+ int8_t tc[4];
+ tc[0] = tc0_table[index_a][bS[0*bsi]];
+ tc[1] = tc0_table[index_a][bS[1*bsi]];
+ tc[2] = tc0_table[index_a][bS[2*bsi]];
+ tc[3] = tc0_table[index_a][bS[3*bsi]];
+ h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
+ } else {
+ h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
}
}
-static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) {
- int i;
- const int bit_depth = h->sps.bit_depth_luma;
- const int qp_bd_offset = 6*(bit_depth-8);
+static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
+ const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
- int alpha = alpha_table[index_a] << (bit_depth-8);
- int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8);
- for( i = 0; i < 4; i++, pix += stride) {
- const int bS_index = i*bsi;
-
- if( bS[bS_index] == 0 ) {
- continue;
- }
+ int alpha = alpha_table[index_a];
+ int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
+ if (alpha ==0 || beta == 0) return;
- if( bS[bS_index] < 4 ) {
- const int tc = (tc0_table[index_a][bS[bS_index]] << (bit_depth-8)) + 1;
- const int p0 = pix[-1];
- const int p1 = pix[-2];
- const int q0 = pix[0];
- const int q1 = pix[1];
-
- if( FFABS( p0 - q0 ) < alpha &&
- FFABS( p1 - p0 ) < beta &&
- FFABS( q1 - q0 ) < beta ) {
- const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-
- pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
- pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
- tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
- }
- }else{
- const int p0 = pix[-1];
- const int p1 = pix[-2];
- const int q0 = pix[0];
- const int q1 = pix[1];
-
- if( FFABS( p0 - q0 ) < alpha &&
- FFABS( p1 - p0 ) < beta &&
- FFABS( q1 - q0 ) < beta ) {
-
- pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
- pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
- tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
- }
- }
+ if( bS[0] < 4 ) {
+ int8_t tc[4];
+ tc[0] = tc0_table[index_a][bS[0*bsi]] + 1;
+ tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
+ tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
+ tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
+ h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
+ } else {
+ h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
}
}
static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
- const int bit_depth = h->sps.bit_depth_luma;
- const int qp_bd_offset = 6*(bit_depth-8);
+ const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
- const int alpha = alpha_table[index_a] << (bit_depth-8);
- const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8);
+ const int alpha = alpha_table[index_a];
+ const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
if (alpha ==0 || beta == 0) return;
if( bS[0] < 4 ) {
int8_t tc[4];
- tc[0] = tc0_table[index_a][bS[0]] << (bit_depth-8);
- tc[1] = tc0_table[index_a][bS[1]] << (bit_depth-8);
- tc[2] = tc0_table[index_a][bS[2]] << (bit_depth-8);
- tc[3] = tc0_table[index_a][bS[3]] << (bit_depth-8);
+ tc[0] = tc0_table[index_a][bS[0]];
+ tc[1] = tc0_table[index_a][bS[1]];
+ tc[2] = tc0_table[index_a][bS[2]];
+ tc[3] = tc0_table[index_a][bS[3]];
h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
} else {
h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
@@ -299,19 +194,18 @@ static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t
}
static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
- const int bit_depth = h->sps.bit_depth_luma;
- const int qp_bd_offset = 6*(bit_depth-8);
+ const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
- const int alpha = alpha_table[index_a] << (bit_depth-8);
- const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset] << (bit_depth-8);
+ const int alpha = alpha_table[index_a];
+ const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
if (alpha ==0 || beta == 0) return;
if( bS[0] < 4 ) {
int8_t tc[4];
- tc[0] = (tc0_table[index_a][bS[0]] << (bit_depth-8))+1;
- tc[1] = (tc0_table[index_a][bS[1]] << (bit_depth-8))+1;
- tc[2] = (tc0_table[index_a][bS[2]] << (bit_depth-8))+1;
- tc[3] = (tc0_table[index_a][bS[3]] << (bit_depth-8))+1;
+ tc[0] = tc0_table[index_a][bS[0]]+1;
+ tc[1] = tc0_table[index_a][bS[1]]+1;
+ tc[2] = tc0_table[index_a][bS[2]]+1;
+ tc[3] = tc0_table[index_a][bS[3]]+1;
h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else {
h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
@@ -650,10 +544,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
//{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
if( dir == 0 ) {
- filter_mb_edgev( &img_y[4*edge<<h->pixel_shift], linesize, bS, qp, h );
+ filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
if( (edge&1) == 0 ) {
- filter_mb_edgecv( &img_cb[2*edge<<h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
- filter_mb_edgecv( &img_cr[2*edge<<h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
+ filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+ filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
}
} else {
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index ab20ecfeb7..f77a013112 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -70,7 +70,7 @@ static const AVRational pixel_aspect[17]={
QP(37,d), QP(37,d), QP(37,d), QP(38,d), QP(38,d), QP(38,d),\
QP(39,d), QP(39,d), QP(39,d), QP(39,d)
-const uint8_t ff_h264_chroma_qp[3][QP_MAX_MAX+1] = {
+const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1] = {
{
CHROMA_QP_TABLE_END(8)
},
diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c
index a2058b5aec..9554201522 100644
--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c
@@ -629,8 +629,9 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
* short_ref and long_ref buffers.
*/
av_log(h->s.avctx, AV_LOG_ERROR,
- "number of reference frames exceeds max (probably "
- "corrupt input), discarding one long:%d short:%d max:%d\n", h->long_ref_count, h->short_ref_count, h->sps.ref_frame_count);
+ "number of reference frames (%d+%d) exceeds max (%d; probably "
+ "corrupt input), discarding one\n",
+ h->long_ref_count, h->short_ref_count, h->sps.ref_frame_count);
if (h->long_ref_count && !h->short_ref_count) {
for (i = 0; i < 16; ++i)
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 04c6ea6df4..96a38ff77d 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -30,15 +30,15 @@
#include "h264dsp.h"
#define BIT_DEPTH 8
-#include "h264dsp_internal.h"
+#include "h264dsp_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 9
-#include "h264dsp_internal.h"
+#include "h264dsp_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
-#include "h264dsp_internal.h"
+#include "h264dsp_template.c"
#undef BIT_DEPTH
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
@@ -47,58 +47,62 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
#define FUNC(a, depth) a ## _ ## depth ## _c
#define H264_DSP(depth) \
- c->h264_idct_add = FUNC(ff_h264_idct_add , depth);\
- c->h264_idct8_add = FUNC(ff_h264_idct8_add , depth);\
- c->h264_idct_dc_add = FUNC(ff_h264_idct_dc_add , depth);\
- c->h264_idct8_dc_add = FUNC(ff_h264_idct8_dc_add , depth);\
- c->h264_idct_add16 = FUNC(ff_h264_idct_add16 , depth);\
- c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4 , depth);\
- c->h264_idct_add8 = FUNC(ff_h264_idct_add8 , depth);\
- c->h264_idct_add16intra = FUNC(ff_h264_idct_add16intra , depth);\
- c->h264_luma_dc_dequant_idct = FUNC(ff_h264_luma_dc_dequant_idct , depth);\
- c->h264_chroma_dc_dequant_idct = FUNC(ff_h264_chroma_dc_dequant_idct , depth);\
+ c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
+ c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
+ c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
+ c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
+ c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\
+ c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\
+ c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\
+ c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
+ c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
+ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
\
- c->weight_h264_pixels_tab[0] = FUNC( weight_h264_pixels16x16 , depth);\
- c->weight_h264_pixels_tab[1] = FUNC( weight_h264_pixels16x8 , depth);\
- c->weight_h264_pixels_tab[2] = FUNC( weight_h264_pixels8x16 , depth);\
- c->weight_h264_pixels_tab[3] = FUNC( weight_h264_pixels8x8 , depth);\
- c->weight_h264_pixels_tab[4] = FUNC( weight_h264_pixels8x4 , depth);\
- c->weight_h264_pixels_tab[5] = FUNC( weight_h264_pixels4x8 , depth);\
- c->weight_h264_pixels_tab[6] = FUNC( weight_h264_pixels4x4 , depth);\
- c->weight_h264_pixels_tab[7] = FUNC( weight_h264_pixels4x2 , depth);\
- c->weight_h264_pixels_tab[8] = FUNC( weight_h264_pixels2x4 , depth);\
- c->weight_h264_pixels_tab[9] = FUNC( weight_h264_pixels2x2 , depth);\
- c->biweight_h264_pixels_tab[0] = FUNC(biweight_h264_pixels16x16 , depth);\
- c->biweight_h264_pixels_tab[1] = FUNC(biweight_h264_pixels16x8 , depth);\
- c->biweight_h264_pixels_tab[2] = FUNC(biweight_h264_pixels8x16 , depth);\
- c->biweight_h264_pixels_tab[3] = FUNC(biweight_h264_pixels8x8 , depth);\
- c->biweight_h264_pixels_tab[4] = FUNC(biweight_h264_pixels8x4 , depth);\
- c->biweight_h264_pixels_tab[5] = FUNC(biweight_h264_pixels4x8 , depth);\
- c->biweight_h264_pixels_tab[6] = FUNC(biweight_h264_pixels4x4 , depth);\
- c->biweight_h264_pixels_tab[7] = FUNC(biweight_h264_pixels4x2 , depth);\
- c->biweight_h264_pixels_tab[8] = FUNC(biweight_h264_pixels2x4 , depth);\
- c->biweight_h264_pixels_tab[9] = FUNC(biweight_h264_pixels2x2 , depth);\
+ c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
+ c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
+ c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
+ c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
+ c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
+ c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
+ c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
+ c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
+ c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
+ c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
+ c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
+ c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
+ c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
+ c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
+ c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
+ c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
+ c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
+ c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
+ c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
+ c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
\
- c->h264_v_loop_filter_luma = FUNC(h264_v_loop_filter_luma , depth);\
- c->h264_h_loop_filter_luma = FUNC(h264_h_loop_filter_luma , depth);\
- c->h264_v_loop_filter_luma_intra = FUNC(h264_v_loop_filter_luma_intra , depth);\
- c->h264_h_loop_filter_luma_intra = FUNC(h264_h_loop_filter_luma_intra , depth);\
- c->h264_v_loop_filter_chroma = FUNC(h264_v_loop_filter_chroma , depth);\
- c->h264_h_loop_filter_chroma = FUNC(h264_h_loop_filter_chroma , depth);\
- c->h264_v_loop_filter_chroma_intra = FUNC(h264_v_loop_filter_chroma_intra, depth);\
- c->h264_h_loop_filter_chroma_intra = FUNC(h264_h_loop_filter_chroma_intra, depth);\
+ c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
+ c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
+ c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\
+ c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\
+ c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
+ c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
+ c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
+ c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
+ c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
+ c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
+ c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
+ c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
c->h264_loop_filter_strength= NULL;
switch (bit_depth) {
- case 9:
- H264_DSP(9);
- break;
- case 10:
- H264_DSP(10);
- break;
- default:
- H264_DSP(8);
- break;
+ case 9:
+ H264_DSP(9);
+ break;
+ case 10:
+ H264_DSP(10);
+ break;
+ default:
+ H264_DSP(8);
+ break;
}
if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 8a0b9ae72b..4b606efa17 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -45,13 +45,17 @@ typedef struct H264DSPContext{
/* loop filter */
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
+ void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
/* v/h_loop_filter_luma_intra: align 16 */
void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+ void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta);
void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
+ void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
+ void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
// h264_loop_filter_strength: simd only. the C version is inlined in h264.c
void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
new file mode 100644
index 0000000000..91162ea900
--- /dev/null
+++ b/libavcodec/h264dsp_template.c
@@ -0,0 +1,313 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG4 part10 DSP functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "high_bit_depth.h"
+
+#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
+#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
+#define H264_WEIGHT(W,H) \
+static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *_block, int stride, int log2_denom, int weight, int offset){ \
+ int y; \
+ pixel *block = (pixel*)_block; \
+ stride /= sizeof(pixel); \
+ offset <<= (log2_denom + (BIT_DEPTH-8)); \
+ if(log2_denom) offset += 1<<(log2_denom-1); \
+ for(y=0; y<H; y++, block += stride){ \
+ op_scale1(0); \
+ op_scale1(1); \
+ if(W==2) continue; \
+ op_scale1(2); \
+ op_scale1(3); \
+ if(W==4) continue; \
+ op_scale1(4); \
+ op_scale1(5); \
+ op_scale1(6); \
+ op_scale1(7); \
+ if(W==8) continue; \
+ op_scale1(8); \
+ op_scale1(9); \
+ op_scale1(10); \
+ op_scale1(11); \
+ op_scale1(12); \
+ op_scale1(13); \
+ op_scale1(14); \
+ op_scale1(15); \
+ } \
+} \
+static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+ int y; \
+ pixel *dst = (pixel*)_dst; \
+ pixel *src = (pixel*)_src; \
+ stride /= sizeof(pixel); \
+ offset = ((offset + 1) | 1) << log2_denom; \
+ for(y=0; y<H; y++, dst += stride, src += stride){ \
+ op_scale2(0); \
+ op_scale2(1); \
+ if(W==2) continue; \
+ op_scale2(2); \
+ op_scale2(3); \
+ if(W==4) continue; \
+ op_scale2(4); \
+ op_scale2(5); \
+ op_scale2(6); \
+ op_scale2(7); \
+ if(W==8) continue; \
+ op_scale2(8); \
+ op_scale2(9); \
+ op_scale2(10); \
+ op_scale2(11); \
+ op_scale2(12); \
+ op_scale2(13); \
+ op_scale2(14); \
+ op_scale2(15); \
+ } \
+}
+
+H264_WEIGHT(16,16)
+H264_WEIGHT(16,8)
+H264_WEIGHT(8,16)
+H264_WEIGHT(8,8)
+H264_WEIGHT(8,4)
+H264_WEIGHT(4,8)
+H264_WEIGHT(4,4)
+H264_WEIGHT(4,2)
+H264_WEIGHT(2,4)
+H264_WEIGHT(2,2)
+
+#undef op_scale1
+#undef op_scale2
+#undef H264_WEIGHT
+
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+{
+ pixel *pix = (pixel*)_pix;
+ int i, d;
+ xstride /= sizeof(pixel);
+ ystride /= sizeof(pixel);
+ alpha <<= BIT_DEPTH - 8;
+ beta <<= BIT_DEPTH - 8;
+ for( i = 0; i < 4; i++ ) {
+ const int tc_orig = tc0[i] << (BIT_DEPTH - 8);
+ if( tc_orig < 0 ) {
+ pix += inner_iters*ystride;
+ continue;
+ }
+ for( d = 0; d < inner_iters; d++ ) {
+ const int p0 = pix[-1*xstride];
+ const int p1 = pix[-2*xstride];
+ const int p2 = pix[-3*xstride];
+ const int q0 = pix[0];
+ const int q1 = pix[1*xstride];
+ const int q2 = pix[2*xstride];
+
+ if( FFABS( p0 - q0 ) < alpha &&
+ FFABS( p1 - p0 ) < beta &&
+ FFABS( q1 - q0 ) < beta ) {
+
+ int tc = tc_orig;
+ int i_delta;
+
+ if( FFABS( p2 - p0 ) < beta ) {
+ if(tc_orig)
+ pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig );
+ tc++;
+ }
+ if( FFABS( q2 - q0 ) < beta ) {
+ if(tc_orig)
+ pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig );
+ tc++;
+ }
+
+ i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+ pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */
+ pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */
+ }
+ pix += ystride;
+ }
+ }
+}
+static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0);
+}
+static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
+}
+static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
+}
+
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+{
+ pixel *pix = (pixel*)_pix;
+ int d;
+ xstride /= sizeof(pixel);
+ ystride /= sizeof(pixel);
+ alpha <<= BIT_DEPTH - 8;
+ beta <<= BIT_DEPTH - 8;
+ for( d = 0; d < 4 * inner_iters; d++ ) {
+ const int p2 = pix[-3*xstride];
+ const int p1 = pix[-2*xstride];
+ const int p0 = pix[-1*xstride];
+
+ const int q0 = pix[ 0*xstride];
+ const int q1 = pix[ 1*xstride];
+ const int q2 = pix[ 2*xstride];
+
+ if( FFABS( p0 - q0 ) < alpha &&
+ FFABS( p1 - p0 ) < beta &&
+ FFABS( q1 - q0 ) < beta ) {
+
+ if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+ if( FFABS( p2 - p0 ) < beta)
+ {
+ const int p3 = pix[-4*xstride];
+ /* p0', p1', p2' */
+ pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+ pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+ pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+ } else {
+ /* p0' */
+ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+ }
+ if( FFABS( q2 - q0 ) < beta)
+ {
+ const int q3 = pix[3*xstride];
+ /* q0', q1', q2' */
+ pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+ pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+ pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+ } else {
+ /* q0' */
+ pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ }
+ }else{
+ /* p0', q0' */
+ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+ pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ }
+ }
+ pix += ystride;
+ }
+}
+static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+{
+ FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta);
+}
+static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+{
+ FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
+}
+static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
+{
+ FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
+}
+
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+{
+ pixel *pix = (pixel*)_pix;
+ int i, d;
+ xstride /= sizeof(pixel);
+ ystride /= sizeof(pixel);
+ alpha <<= BIT_DEPTH - 8;
+ beta <<= BIT_DEPTH - 8;
+ for( i = 0; i < 4; i++ ) {
+ const int tc = ((tc0[i] - 1) << (BIT_DEPTH - 8)) + 1;
+ if( tc <= 0 ) {
+ pix += inner_iters*ystride;
+ continue;
+ }
+ for( d = 0; d < inner_iters; d++ ) {
+ const int p0 = pix[-1*xstride];
+ const int p1 = pix[-2*xstride];
+ const int q0 = pix[0];
+ const int q1 = pix[1*xstride];
+
+ if( FFABS( p0 - q0 ) < alpha &&
+ FFABS( p1 - p0 ) < beta &&
+ FFABS( q1 - q0 ) < beta ) {
+
+ int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+
+ pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */
+ pix[0] = av_clip_pixel( q0 - delta ); /* q0' */
+ }
+ pix += ystride;
+ }
+ }
+}
+static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0);
+}
+static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
+}
+static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0);
+}
+
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+{
+ pixel *pix = (pixel*)_pix;
+ int d;
+ xstride /= sizeof(pixel);
+ ystride /= sizeof(pixel);
+ alpha <<= BIT_DEPTH - 8;
+ beta <<= BIT_DEPTH - 8;
+ for( d = 0; d < 4 * inner_iters; d++ ) {
+ const int p0 = pix[-1*xstride];
+ const int p1 = pix[-2*xstride];
+ const int q0 = pix[0];
+ const int q1 = pix[1*xstride];
+
+ if( FFABS( p0 - q0 ) < alpha &&
+ FFABS( p1 - p0 ) < beta &&
+ FFABS( q1 - q0 ) < beta ) {
+
+ pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
+ pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
+ }
+ pix += ystride;
+ }
+}
+static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+{
+ FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta);
+}
+static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+{
+ FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
+}
+static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
+{
+ FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta);
+}
diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index 920356d01f..7d1ee007bc 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -26,13 +26,13 @@
*/
#define BIT_DEPTH 8
-#include "h264idct_internal.h"
+#include "h264idct_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 9
-#include "h264idct_internal.h"
+#include "h264idct_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
-#include "h264idct_internal.h"
+#include "h264idct_template.c"
#undef BIT_DEPTH
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
new file mode 100644
index 0000000000..39c9a1c9eb
--- /dev/null
+++ b/libavcodec/h264idct_template.c
@@ -0,0 +1,291 @@
+/*
+ * H.264 IDCT
+ * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 IDCT.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "high_bit_depth.h"
+
+#ifndef AVCODEC_H264IDCT_INTERNAL_H
+#define AVCODEC_H264IDCT_INTERNAL_H
+//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
+static const uint8_t scan8[16 + 2*4]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+};
+#endif
+
+static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){
+ int i;
+ INIT_CLIP
+ pixel *dst = (pixel*)_dst;
+ dctcoef *block = (dctcoef*)_block;
+ stride /= sizeof(pixel);
+
+ block[0] += 1<<(shift-1);
+
+ for(i=0; i<4; i++){
+ const int z0= block[i + block_stride*0] + block[i + block_stride*2];
+ const int z1= block[i + block_stride*0] - block[i + block_stride*2];
+ const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3];
+ const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1);
+
+ block[i + block_stride*0]= z0 + z3;
+ block[i + block_stride*1]= z1 + z2;
+ block[i + block_stride*2]= z1 - z2;
+ block[i + block_stride*3]= z0 - z3;
+ }
+
+ for(i=0; i<4; i++){
+ const int z0= block[0 + block_stride*i] + block[2 + block_stride*i];
+ const int z1= block[0 + block_stride*i] - block[2 + block_stride*i];
+ const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i];
+ const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1);
+
+ dst[i + 0*stride]= CLIP(add*dst[i + 0*stride] + ((z0 + z3) >> shift));
+ dst[i + 1*stride]= CLIP(add*dst[i + 1*stride] + ((z1 + z2) >> shift));
+ dst[i + 2*stride]= CLIP(add*dst[i + 2*stride] + ((z1 - z2) >> shift));
+ dst[i + 3*stride]= CLIP(add*dst[i + 3*stride] + ((z0 - z3) >> shift));
+ }
+}
+
+void FUNCC(ff_h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride){
+ FUNCC(idct_internal)(dst, block, stride, 4, 6, 1);
+}
+
+void FUNCC(ff_h264_lowres_idct_add)(uint8_t *dst, int stride, DCTELEM *block){
+ FUNCC(idct_internal)(dst, block, stride, 8, 3, 1);
+}
+
+void FUNCC(ff_h264_lowres_idct_put)(uint8_t *dst, int stride, DCTELEM *block){
+ FUNCC(idct_internal)(dst, block, stride, 8, 3, 0);
+}
+
+void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
+ int i;
+ INIT_CLIP
+ pixel *dst = (pixel*)_dst;
+ dctcoef *block = (dctcoef*)_block;
+ stride /= sizeof(pixel);
+
+ block[0] += 32;
+
+ for( i = 0; i < 8; i++ )
+ {
+ const int a0 = block[i+0*8] + block[i+4*8];
+ const int a2 = block[i+0*8] - block[i+4*8];
+ const int a4 = (block[i+2*8]>>1) - block[i+6*8];
+ const int a6 = (block[i+6*8]>>1) + block[i+2*8];
+
+ const int b0 = a0 + a6;
+ const int b2 = a2 + a4;
+ const int b4 = a2 - a4;
+ const int b6 = a0 - a6;
+
+ const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
+ const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
+ const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
+ const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
+
+ const int b1 = (a7>>2) + a1;
+ const int b3 = a3 + (a5>>2);
+ const int b5 = (a3>>2) - a5;
+ const int b7 = a7 - (a1>>2);
+
+ block[i+0*8] = b0 + b7;
+ block[i+7*8] = b0 - b7;
+ block[i+1*8] = b2 + b5;
+ block[i+6*8] = b2 - b5;
+ block[i+2*8] = b4 + b3;
+ block[i+5*8] = b4 - b3;
+ block[i+3*8] = b6 + b1;
+ block[i+4*8] = b6 - b1;
+ }
+ for( i = 0; i < 8; i++ )
+ {
+ const int a0 = block[0+i*8] + block[4+i*8];
+ const int a2 = block[0+i*8] - block[4+i*8];
+ const int a4 = (block[2+i*8]>>1) - block[6+i*8];
+ const int a6 = (block[6+i*8]>>1) + block[2+i*8];
+
+ const int b0 = a0 + a6;
+ const int b2 = a2 + a4;
+ const int b4 = a2 - a4;
+ const int b6 = a0 - a6;
+
+ const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
+ const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
+ const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
+ const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
+
+ const int b1 = (a7>>2) + a1;
+ const int b3 = a3 + (a5>>2);
+ const int b5 = (a3>>2) - a5;
+ const int b7 = a7 - (a1>>2);
+
+ dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) );
+ dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) );
+ dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) );
+ dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) );
+ dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) );
+ dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) );
+ dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) );
+ dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) );
+ }
+}
+
+// assumes all AC coefs are 0
+void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
+ int i, j;
+ int dc = (((dctcoef*)block)[0] + 32) >> 6;
+ INIT_CLIP
+ pixel *dst = (pixel*)_dst;
+ stride /= sizeof(pixel);
+ for( j = 0; j < 4; j++ )
+ {
+ for( i = 0; i < 4; i++ )
+ dst[i] = CLIP( dst[i] + dc );
+ dst += stride;
+ }
+}
+
+void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
+ int i, j;
+ int dc = (((dctcoef*)block)[0] + 32) >> 6;
+ INIT_CLIP
+ pixel *dst = (pixel*)_dst;
+ stride /= sizeof(pixel);
+ for( j = 0; j < 8; j++ )
+ {
+ for( i = 0; i < 8; i++ )
+ dst[i] = CLIP( dst[i] + dc );
+ dst += stride;
+ }
+}
+
+void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
+ else FUNCC(idct_internal )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1);
+ }
+ }
+}
+
+void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ]) FUNCC(idct_internal )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1);
+ else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
+ }
+}
+
+void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
+ else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride);
+ }
+ }
+}
+
+void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i] ])
+ FUNCC(ff_h264_idct_add )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
+ else if(((dctcoef*)block)[i*16])
+ FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride);
+ }
+}
+/**
+ * IDCT transforms the 16 dc values and dequantizes them.
+ * @param qp quantization parameter
+ */
+void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int qmul){
+#define stride 16
+ int i;
+ int temp[16];
+ static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
+ dctcoef *input = (dctcoef*)_input;
+ dctcoef *output = (dctcoef*)_output;
+
+ for(i=0; i<4; i++){
+ const int z0= input[4*i+0] + input[4*i+1];
+ const int z1= input[4*i+0] - input[4*i+1];
+ const int z2= input[4*i+2] - input[4*i+3];
+ const int z3= input[4*i+2] + input[4*i+3];
+
+ temp[4*i+0]= z0+z3;
+ temp[4*i+1]= z0-z3;
+ temp[4*i+2]= z1-z2;
+ temp[4*i+3]= z1+z2;
+ }
+
+ for(i=0; i<4; i++){
+ const int offset= x_offset[i];
+ const int z0= temp[4*0+i] + temp[4*2+i];
+ const int z1= temp[4*0+i] - temp[4*2+i];
+ const int z2= temp[4*1+i] - temp[4*3+i];
+ const int z3= temp[4*1+i] + temp[4*3+i];
+
+ output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8));
+ output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
+ output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
+ output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
+ }
+#undef stride
+}
+
+void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
+ const int stride= 16*2;
+ const int xStride= 16;
+ int a,b,c,d,e;
+ dctcoef *block = (dctcoef*)_block;
+
+ a= block[stride*0 + xStride*0];
+ b= block[stride*0 + xStride*1];
+ c= block[stride*1 + xStride*0];
+ d= block[stride*1 + xStride*1];
+
+ e= a-b;
+ a= a+b;
+ b= c-d;
+ c= c+d;
+
+ block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
+ block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
+ block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
+ block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
+}
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 8d2c6f0355..f6533cf9ba 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -28,17 +28,338 @@
#include "h264pred.h"
#define BIT_DEPTH 8
-#include "h264pred_internal.h"
+#include "h264pred_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 9
-#include "h264pred_internal.h"
+#include "h264pred_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
-#include "h264pred_internal.h"
+#include "h264pred_template.c"
#undef BIT_DEPTH
+static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+ const int lt= src[-1-1*stride];
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+ uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
+ (t0 + 2*t1 + t2 + 2) >> 2,
+ (t1 + 2*t2 + t3 + 2) >> 2,
+ (t2 + 2*t3 + t4 + 2) >> 2);
+
+ AV_WN32A(src+0*stride, v);
+ AV_WN32A(src+1*stride, v);
+ AV_WN32A(src+2*stride, v);
+ AV_WN32A(src+3*stride, v);
+}
+
+static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+ const int lt= src[-1-1*stride];
+ LOAD_LEFT_EDGE
+
+ AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101);
+ AV_WN32A(src+1*stride, ((l0 + 2*l1 + l2 + 2) >> 2)*0x01010101);
+ AV_WN32A(src+2*stride, ((l1 + 2*l2 + l3 + 2) >> 2)*0x01010101);
+ AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
+}
+
+static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_TOP_EDGE
+ LOAD_LEFT_EDGE
+ const av_unused int unu0= t0;
+ const av_unused int unu1= l0;
+
+ src[0+0*stride]=(l1 + t1)>>1;
+ src[1+0*stride]=
+ src[0+1*stride]=(l2 + t2)>>1;
+ src[2+0*stride]=
+ src[1+1*stride]=
+ src[0+2*stride]=
+ src[3+0*stride]=
+ src[2+1*stride]=
+ src[1+2*stride]=
+ src[0+3*stride]=
+ src[3+1*stride]=
+ src[2+2*stride]=
+ src[1+3*stride]=
+ src[3+2*stride]=
+ src[2+3*stride]=
+ src[3+3*stride]=(l3 + t3)>>1;
+}
+
+static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+ LOAD_LEFT_EDGE
+ LOAD_DOWN_LEFT_EDGE
+
+ src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+ src[1+0*stride]=
+ src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+ src[2+0*stride]=
+ src[1+1*stride]=
+ src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3;
+ src[3+0*stride]=
+ src[2+1*stride]=
+ src[1+2*stride]=
+ src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3;
+ src[3+1*stride]=
+ src[2+2*stride]=
+ src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3;
+ src[3+2*stride]=
+ src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3;
+ src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
+}
+
+static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+ LOAD_LEFT_EDGE
+
+ src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+ src[1+0*stride]=
+ src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+ src[2+0*stride]=
+ src[1+1*stride]=
+ src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3;
+ src[3+0*stride]=
+ src[2+1*stride]=
+ src[1+2*stride]=
+ src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3;
+ src[3+1*stride]=
+ src[2+2*stride]=
+ src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3;
+ src[3+2*stride]=
+ src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3;
+ src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
+}
+
+static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride,
+ const int l0, const int l1, const int l2, const int l3, const int l4){
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+
+ src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3;
+ src[1+0*stride]=
+ src[0+2*stride]=(t1 + t2 + 1)>>1;
+ src[2+0*stride]=
+ src[1+2*stride]=(t2 + t3 + 1)>>1;
+ src[3+0*stride]=
+ src[2+2*stride]=(t3 + t4+ 1)>>1;
+ src[3+2*stride]=(t4 + t5+ 1)>>1;
+ src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3;
+ src[1+1*stride]=
+ src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+ src[2+1*stride]=
+ src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+ src[3+1*stride]=
+ src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+ src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_LEFT_EDGE
+ LOAD_DOWN_LEFT_EDGE
+
+ pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
+}
+
+static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_LEFT_EDGE
+
+ pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
+}
+
+static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+
+ src[0+0*stride]=(t0 + t1 + 1)>>1;
+ src[1+0*stride]=
+ src[0+2*stride]=(t1 + t2 + 1)>>1;
+ src[2+0*stride]=
+ src[1+2*stride]=(t2 + t3 + 1)>>1;
+ src[3+0*stride]=
+ src[2+2*stride]=(t3 + t4 + 1)>>1;
+ src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+ src[1+1*stride]=
+ src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+ src[2+1*stride]=
+ src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+ src[3+1*stride]=
+ src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+ src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+ src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_LEFT_EDGE
+ LOAD_DOWN_LEFT_EDGE
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+
+ src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+ src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+ src[2+0*stride]=
+ src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+ src[3+0*stride]=
+ src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+ src[2+1*stride]=
+ src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+ src[3+1*stride]=
+ src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+ src[3+2*stride]=
+ src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2;
+ src[0+3*stride]=
+ src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2;
+ src[2+3*stride]=(l4 + l5 + 1)>>1;
+ src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+ LOAD_LEFT_EDGE
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+
+ src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+ src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+ src[2+0*stride]=
+ src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+ src[3+0*stride]=
+ src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+ src[2+1*stride]=
+ src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+ src[3+1*stride]=
+ src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+ src[3+2*stride]=
+ src[1+3*stride]=l3;
+ src[0+3*stride]=
+ src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2;
+ src[2+3*stride]=
+ src[3+3*stride]=l3;
+}
+
+static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
+ uint8_t *top = src-stride;
+ int y;
+
+ for (y = 0; y < 4; y++) {
+ uint8_t *cm_in = cm + src[-1];
+ src[0] = cm_in[top[0]];
+ src[1] = cm_in[top[1]];
+ src[2] = cm_in[top[2]];
+ src[3] = cm_in[top[3]];
+ src += stride;
+ }
+}
+
+static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
+ pred16x16_plane_compat_8_c(src, stride, 1, 0);
+}
+
+static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
+ pred16x16_plane_compat_8_c(src, stride, 0, 1);
+}
+
+static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
+ uint8_t *top = src-stride;
+ int y;
+
+ for (y = 0; y < 16; y++) {
+ uint8_t *cm_in = cm + src[-1];
+ src[0] = cm_in[top[0]];
+ src[1] = cm_in[top[1]];
+ src[2] = cm_in[top[2]];
+ src[3] = cm_in[top[3]];
+ src[4] = cm_in[top[4]];
+ src[5] = cm_in[top[5]];
+ src[6] = cm_in[top[6]];
+ src[7] = cm_in[top[7]];
+ src[8] = cm_in[top[8]];
+ src[9] = cm_in[top[9]];
+ src[10] = cm_in[top[10]];
+ src[11] = cm_in[top[11]];
+ src[12] = cm_in[top[12]];
+ src[13] = cm_in[top[13]];
+ src[14] = cm_in[top[14]];
+ src[15] = cm_in[top[15]];
+ src += stride;
+ }
+}
+
+static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
+ int i;
+ int dc0;
+
+ dc0=0;
+ for(i=0;i<8; i++)
+ dc0+= src[-1+i*stride];
+ dc0= 0x01010101*((dc0 + 4)>>3);
+
+ for(i=0; i<8; i++){
+ ((uint32_t*)(src+i*stride))[0]=
+ ((uint32_t*)(src+i*stride))[1]= dc0;
+ }
+}
+
+static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
+ int i;
+ int dc0;
+
+ dc0=0;
+ for(i=0;i<8; i++)
+ dc0+= src[i-stride];
+ dc0= 0x01010101*((dc0 + 4)>>3);
+
+ for(i=0; i<8; i++){
+ ((uint32_t*)(src+i*stride))[0]=
+ ((uint32_t*)(src+i*stride))[1]= dc0;
+ }
+}
+
+static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
+ int i;
+ int dc0=0;
+
+ for(i=0;i<4; i++){
+ dc0+= src[-1+i*stride] + src[i-stride];
+ dc0+= src[4+i-stride];
+ dc0+= src[-1+(i+4)*stride];
+ }
+ dc0= 0x01010101*((dc0 + 8)>>4);
+
+ for(i=0; i<4; i++){
+ ((uint32_t*)(src+i*stride))[0]= dc0;
+ ((uint32_t*)(src+i*stride))[1]= dc0;
+ }
+ for(i=4; i<8; i++){
+ ((uint32_t*)(src+i*stride))[0]= dc0;
+ ((uint32_t*)(src+i*stride))[1]= dc0;
+ }
+}
+
+static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
+ uint8_t *top = src-stride;
+ int y;
+
+ for (y = 0; y < 8; y++) {
+ uint8_t *cm_in = cm + src[-1];
+ src[0] = cm_in[top[0]];
+ src[1] = cm_in[top[1]];
+ src[2] = cm_in[top[2]];
+ src[3] = cm_in[top[3]];
+ src[4] = cm_in[top[4]];
+ src[5] = cm_in[top[5]];
+ src[6] = cm_in[top[6]];
+ src[7] = cm_in[top[7]];
+ src += stride;
+ }
+}
+
/**
* Set the intra prediction function pointers.
*/
@@ -49,26 +370,27 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
#undef FUNCC
#define FUNC(a, depth) a ## _ ## depth
#define FUNCC(a, depth) a ## _ ## depth ## _c
+#define FUNCD(a) a ## _c
#define H264_PRED(depth) \
if(codec_id != CODEC_ID_RV40){\
if(codec_id == CODEC_ID_VP8) {\
- h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical_vp8 , depth);\
- h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal_vp8 , depth);\
+ h->pred4x4[VERT_PRED ]= FUNCD(pred4x4_vertical_vp8);\
+ h->pred4x4[HOR_PRED ]= FUNCD(pred4x4_horizontal_vp8);\
} else {\
h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\
h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\
}\
h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\
if(codec_id == CODEC_ID_SVQ3)\
- h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left_svq3, depth);\
+ h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
else\
h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left , depth);\
h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\
h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\
h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\
if (codec_id == CODEC_ID_VP8) {\
- h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left_vp8 , depth);\
+ h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_vp8);\
} else\
h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left , depth);\
h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up , depth);\
@@ -77,7 +399,7 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\
h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\
} else {\
- h->pred4x4[TM_VP8_PRED ]= FUNCC(pred4x4_tm_vp8 , depth);\
+ h->pred4x4[TM_VP8_PRED ]= FUNCD(pred4x4_tm_vp8);\
h->pred4x4[DC_127_PRED ]= FUNCC(pred4x4_127_dc , depth);\
h->pred4x4[DC_129_PRED ]= FUNCC(pred4x4_129_dc , depth);\
h->pred4x4[VERT_VP8_PRED ]= FUNCC(pred4x4_vertical , depth);\
@@ -87,18 +409,18 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\
h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\
h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\
- h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left_rv40 , depth);\
+ h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\
h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\
h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\
h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\
- h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left_rv40, depth);\
- h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up_rv40, depth);\
+ h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_rv40);\
+ h->pred4x4[HOR_UP_PRED ]= FUNCD(pred4x4_horizontal_up_rv40);\
h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\
h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\
h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\
- h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCC(pred4x4_down_left_rv40_nodown, depth);\
- h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCC(pred4x4_horizontal_up_rv40_nodown , depth);\
- h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCC(pred4x4_vertical_left_rv40_nodown , depth);\
+ h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\
+ h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\
+ h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\
}\
\
h->pred8x8l[VERT_PRED ]= FUNCC(pred8x8l_vertical , depth);\
@@ -119,7 +441,7 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
if (codec_id != CODEC_ID_VP8) {\
h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\
} else\
- h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_tm_vp8 , depth);\
+ h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\
@@ -129,9 +451,9 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
}else{\
- h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc_rv40 , depth);\
- h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc_rv40 , depth);\
- h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc_rv40 , depth);\
+ h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
+ h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
+ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
if (codec_id == CODEC_ID_VP8) {\
h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc , depth);\
h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\
@@ -144,13 +466,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
h->pred16x16[HOR_PRED8x8 ]= FUNCC(pred16x16_horizontal , depth);\
switch(codec_id){\
case CODEC_ID_SVQ3:\
- h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane_svq3 , depth);\
+ h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_svq3);\
break;\
case CODEC_ID_RV40:\
- h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane_rv40 , depth);\
+ h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_rv40);\
break;\
case CODEC_ID_VP8:\
- h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_tm_vp8 , depth);\
+ h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_tm_vp8);\
h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc , depth);\
h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc , depth);\
break;\
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
new file mode 100644
index 0000000000..066e837cdf
--- /dev/null
+++ b/libavcodec/h264pred_template.c
@@ -0,0 +1,975 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG4 part10 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "mathops.h"
+#include "high_bit_depth.h"
+
+static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const pixel4 a= ((pixel4*)(src-stride))[0];
+ ((pixel4*)(src+0*stride))[0]= a;
+ ((pixel4*)(src+1*stride))[0]= a;
+ ((pixel4*)(src+2*stride))[0]= a;
+ ((pixel4*)(src+3*stride))[0]= a;
+}
+
+static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ ((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]);
+ ((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]);
+ ((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]);
+ ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]);
+}
+
+static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
+ + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
+
+ ((pixel4*)(src+0*stride))[0]=
+ ((pixel4*)(src+1*stride))[0]=
+ ((pixel4*)(src+2*stride))[0]=
+ ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
+}
+
+static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
+
+ ((pixel4*)(src+0*stride))[0]=
+ ((pixel4*)(src+1*stride))[0]=
+ ((pixel4*)(src+2*stride))[0]=
+ ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
+}
+
+static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
+
+ ((pixel4*)(src+0*stride))[0]=
+ ((pixel4*)(src+1*stride))[0]=
+ ((pixel4*)(src+2*stride))[0]=
+ ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
+}
+
+static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ ((pixel4*)(src+0*stride))[0]=
+ ((pixel4*)(src+1*stride))[0]=
+ ((pixel4*)(src+2*stride))[0]=
+ ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
+}
+
+static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ ((pixel4*)(src+0*stride))[0]=
+ ((pixel4*)(src+1*stride))[0]=
+ ((pixel4*)(src+2*stride))[0]=
+ ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
+}
+
+static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ ((pixel4*)(src+0*stride))[0]=
+ ((pixel4*)(src+1*stride))[0]=
+ ((pixel4*)(src+2*stride))[0]=
+ ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
+}
+
+
+#define LOAD_TOP_RIGHT_EDGE\
+ const int av_unused t4= topright[0];\
+ const int av_unused t5= topright[1];\
+ const int av_unused t6= topright[2];\
+ const int av_unused t7= topright[3];\
+
+#define LOAD_DOWN_LEFT_EDGE\
+ const int av_unused l4= src[-1+4*stride];\
+ const int av_unused l5= src[-1+5*stride];\
+ const int av_unused l6= src[-1+6*stride];\
+ const int av_unused l7= src[-1+7*stride];\
+
+#define LOAD_LEFT_EDGE\
+ const int av_unused l0= src[-1+0*stride];\
+ const int av_unused l1= src[-1+1*stride];\
+ const int av_unused l2= src[-1+2*stride];\
+ const int av_unused l3= src[-1+3*stride];\
+
+#define LOAD_TOP_EDGE\
+ const int av_unused t0= src[ 0-1*stride];\
+ const int av_unused t1= src[ 1-1*stride];\
+ const int av_unused t2= src[ 2-1*stride];\
+ const int av_unused t3= src[ 3-1*stride];\
+
+static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const int lt= src[-1-1*stride];
+ LOAD_TOP_EDGE
+ LOAD_LEFT_EDGE
+
+ src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
+ src[0+2*stride]=
+ src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
+ src[0+1*stride]=
+ src[1+2*stride]=
+ src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
+ src[0+0*stride]=
+ src[1+1*stride]=
+ src[2+2*stride]=
+ src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+ src[1+0*stride]=
+ src[2+1*stride]=
+ src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
+ src[2+0*stride]=
+ src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+ src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
+ pixel *src = (pixel*)_src;
+ const pixel *topright = (const pixel*)_topright;
+ int stride = _stride/sizeof(pixel);
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+// LOAD_LEFT_EDGE
+
+ src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
+ src[1+0*stride]=
+ src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
+ src[2+0*stride]=
+ src[1+1*stride]=
+ src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
+ src[3+0*stride]=
+ src[2+1*stride]=
+ src[1+2*stride]=
+ src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
+ src[3+1*stride]=
+ src[2+2*stride]=
+ src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
+ src[3+2*stride]=
+ src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
+ src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const int lt= src[-1-1*stride];
+ LOAD_TOP_EDGE
+ LOAD_LEFT_EDGE
+
+ src[0+0*stride]=
+ src[1+2*stride]=(lt + t0 + 1)>>1;
+ src[1+0*stride]=
+ src[2+2*stride]=(t0 + t1 + 1)>>1;
+ src[2+0*stride]=
+ src[3+2*stride]=(t1 + t2 + 1)>>1;
+ src[3+0*stride]=(t2 + t3 + 1)>>1;
+ src[0+1*stride]=
+ src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+ src[1+1*stride]=
+ src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
+ src[2+1*stride]=
+ src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+ src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+ src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+ src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
+ pixel *src = (pixel*)_src;
+ const pixel *topright = (const pixel*)_topright;
+ int stride = _stride/sizeof(pixel);
+ LOAD_TOP_EDGE
+ LOAD_TOP_RIGHT_EDGE
+
+ src[0+0*stride]=(t0 + t1 + 1)>>1;
+ src[1+0*stride]=
+ src[0+2*stride]=(t1 + t2 + 1)>>1;
+ src[2+0*stride]=
+ src[1+2*stride]=(t2 + t3 + 1)>>1;
+ src[3+0*stride]=
+ src[2+2*stride]=(t3 + t4+ 1)>>1;
+ src[3+2*stride]=(t4 + t5+ 1)>>1;
+ src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+ src[1+1*stride]=
+ src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+ src[2+1*stride]=
+ src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+ src[3+1*stride]=
+ src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+ src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ LOAD_LEFT_EDGE
+
+ src[0+0*stride]=(l0 + l1 + 1)>>1;
+ src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+ src[2+0*stride]=
+ src[0+1*stride]=(l1 + l2 + 1)>>1;
+ src[3+0*stride]=
+ src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+ src[2+1*stride]=
+ src[0+2*stride]=(l2 + l3 + 1)>>1;
+ src[3+1*stride]=
+ src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
+ src[3+2*stride]=
+ src[1+3*stride]=
+ src[0+3*stride]=
+ src[2+2*stride]=
+ src[2+3*stride]=
+ src[3+3*stride]=l3;
+}
+
+static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const int lt= src[-1-1*stride];
+ LOAD_TOP_EDGE
+ LOAD_LEFT_EDGE
+
+ src[0+0*stride]=
+ src[2+1*stride]=(lt + l0 + 1)>>1;
+ src[1+0*stride]=
+ src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
+ src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
+ src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+ src[0+1*stride]=
+ src[2+2*stride]=(l0 + l1 + 1)>>1;
+ src[1+1*stride]=
+ src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+ src[0+2*stride]=
+ src[2+3*stride]=(l1 + l2+ 1)>>1;
+ src[1+2*stride]=
+ src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+ src[0+3*stride]=(l2 + l3 + 1)>>1;
+ src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+}
+
+static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){
+ int i;
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const pixel4 a = ((pixel4*)(src-stride))[0];
+ const pixel4 b = ((pixel4*)(src-stride))[1];
+ const pixel4 c = ((pixel4*)(src-stride))[2];
+ const pixel4 d = ((pixel4*)(src-stride))[3];
+
+ for(i=0; i<16; i++){
+ ((pixel4*)(src+i*stride))[0] = a;
+ ((pixel4*)(src+i*stride))[1] = b;
+ ((pixel4*)(src+i*stride))[2] = c;
+ ((pixel4*)(src+i*stride))[3] = d;
+ }
+}
+
+static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){
+ int i;
+ pixel *src = (pixel*)_src;
+ stride /= sizeof(pixel);
+
+ for(i=0; i<16; i++){
+ ((pixel4*)(src+i*stride))[0] =
+ ((pixel4*)(src+i*stride))[1] =
+ ((pixel4*)(src+i*stride))[2] =
+ ((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]);
+ }
+}
+
+#define PREDICT_16x16_DC(v)\
+ for(i=0; i<16; i++){\
+ AV_WN4P(src+ 0, v);\
+ AV_WN4P(src+ 4, v);\
+ AV_WN4P(src+ 8, v);\
+ AV_WN4P(src+12, v);\
+ src += stride;\
+ }
+
+static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){
+ int i, dc=0;
+ pixel *src = (pixel*)_src;
+ pixel4 dcsplat;
+ stride /= sizeof(pixel);
+
+ for(i=0;i<16; i++){
+ dc+= src[-1+i*stride];
+ }
+
+ for(i=0;i<16; i++){
+ dc+= src[i-stride];
+ }
+
+ dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
+ PREDICT_16x16_DC(dcsplat);
+}
+
+static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){
+ int i, dc=0;
+ pixel *src = (pixel*)_src;
+ pixel4 dcsplat;
+ stride /= sizeof(pixel);
+
+ for(i=0;i<16; i++){
+ dc+= src[-1+i*stride];
+ }
+
+ dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+ PREDICT_16x16_DC(dcsplat);
+}
+
+static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){
+ int i, dc=0;
+ pixel *src = (pixel*)_src;
+ pixel4 dcsplat;
+ stride /= sizeof(pixel);
+
+ for(i=0;i<16; i++){
+ dc+= src[i-stride];
+ }
+
+ dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+ PREDICT_16x16_DC(dcsplat);
+}
+
+#define PRED16x16_X(n, v) \
+static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\
+ int i;\
+ pixel *src = (pixel*)_src;\
+ stride /= sizeof(pixel);\
+ PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
+}
+
+PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1);
+PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0);
+PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1);
+
+static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){
+ int i, j, k;
+ int a;
+ INIT_CLIP
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const pixel * const src0 = src +7-stride;
+ const pixel * src1 = src +8*stride-1;
+ const pixel * src2 = src1-2*stride; // == src+6*stride-1;
+ int H = src0[1] - src0[-1];
+ int V = src1[0] - src2[ 0];
+ for(k=2; k<=8; ++k) {
+ src1 += stride; src2 -= stride;
+ H += k*(src0[k] - src0[-k]);
+ V += k*(src1[0] - src2[ 0]);
+ }
+ if(svq3){
+ H = ( 5*(H/4) ) / 16;
+ V = ( 5*(V/4) ) / 16;
+
+ /* required for 100% accuracy */
+ i = H; H = V; V = i;
+ }else if(rv40){
+ H = ( H + (H>>2) ) >> 4;
+ V = ( V + (V>>2) ) >> 4;
+ }else{
+ H = ( 5*H+32 ) >> 6;
+ V = ( 5*V+32 ) >> 6;
+ }
+
+ a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
+ for(j=16; j>0; --j) {
+ int b = a;
+ a += V;
+ for(i=-16; i<0; i+=4) {
+ src[16+i] = CLIP((b ) >> 5);
+ src[17+i] = CLIP((b+ H) >> 5);
+ src[18+i] = CLIP((b+2*H) >> 5);
+ src[19+i] = CLIP((b+3*H) >> 5);
+ b += 4*H;
+ }
+ src += stride;
+ }
+}
+
+static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){
+ FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
+}
+
+static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
+ int i;
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const pixel4 a= ((pixel4*)(src-stride))[0];
+ const pixel4 b= ((pixel4*)(src-stride))[1];
+
+ for(i=0; i<8; i++){
+ ((pixel4*)(src+i*stride))[0]= a;
+ ((pixel4*)(src+i*stride))[1]= b;
+ }
+}
+
+static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
+ int i;
+ pixel *src = (pixel*)_src;
+ stride /= sizeof(pixel);
+
+ for(i=0; i<8; i++){
+ ((pixel4*)(src+i*stride))[0]=
+ ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]);
+ }
+}
+
+#define PRED8x8_X(n, v)\
+static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
+ int i;\
+ pixel *src = (pixel*)_src;\
+ stride /= sizeof(pixel);\
+ for(i=0; i<8; i++){\
+ ((pixel4*)(src+i*stride))[0]=\
+ ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\
+ }\
+}
+
+PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1);
+PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0);
+PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1);
+
+static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
+ int i;
+ int dc0, dc2;
+ pixel4 dc0splat, dc2splat;
+ pixel *src = (pixel*)_src;
+ stride /= sizeof(pixel);
+
+ dc0=dc2=0;
+ for(i=0;i<4; i++){
+ dc0+= src[-1+i*stride];
+ dc2+= src[-1+(i+4)*stride];
+ }
+ dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+ dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+
+ for(i=0; i<4; i++){
+ ((pixel4*)(src+i*stride))[0]=
+ ((pixel4*)(src+i*stride))[1]= dc0splat;
+ }
+ for(i=4; i<8; i++){
+ ((pixel4*)(src+i*stride))[0]=
+ ((pixel4*)(src+i*stride))[1]= dc2splat;
+ }
+}
+
+static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
+ int i;
+ int dc0, dc1;
+ pixel4 dc0splat, dc1splat;
+ pixel *src = (pixel*)_src;
+ stride /= sizeof(pixel);
+
+ dc0=dc1=0;
+ for(i=0;i<4; i++){
+ dc0+= src[i-stride];
+ dc1+= src[4+i-stride];
+ }
+ dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+ dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+
+ for(i=0; i<4; i++){
+ ((pixel4*)(src+i*stride))[0]= dc0splat;
+ ((pixel4*)(src+i*stride))[1]= dc1splat;
+ }
+ for(i=4; i<8; i++){
+ ((pixel4*)(src+i*stride))[0]= dc0splat;
+ ((pixel4*)(src+i*stride))[1]= dc1splat;
+ }
+}
+
+static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
+ int i;
+ int dc0, dc1, dc2;
+ pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
+ pixel *src = (pixel*)_src;
+ stride /= sizeof(pixel);
+
+ dc0=dc1=dc2=0;
+ for(i=0;i<4; i++){
+ dc0+= src[-1+i*stride] + src[i-stride];
+ dc1+= src[4+i-stride];
+ dc2+= src[-1+(i+4)*stride];
+ }
+ dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
+ dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+ dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+ dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
+
+ for(i=0; i<4; i++){
+ ((pixel4*)(src+i*stride))[0]= dc0splat;
+ ((pixel4*)(src+i*stride))[1]= dc1splat;
+ }
+ for(i=4; i<8; i++){
+ ((pixel4*)(src+i*stride))[0]= dc2splat;
+ ((pixel4*)(src+i*stride))[1]= dc3splat;
+ }
+}
+
+//the following 4 function should not be optimized!
+static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
+ FUNCC(pred8x8_top_dc)(src, stride);
+ FUNCC(pred4x4_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
+ FUNCC(pred8x8_dc)(src, stride);
+ FUNCC(pred4x4_top_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
+ FUNCC(pred8x8_left_dc)(src, stride);
+ FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
+ FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
+ FUNCC(pred8x8_left_dc)(src, stride);
+ FUNCC(pred4x4_128_dc)(src , NULL, stride);
+ FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
+ int j, k;
+ int a;
+ INIT_CLIP
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ const pixel * const src0 = src +3-stride;
+ const pixel * src1 = src +4*stride-1;
+ const pixel * src2 = src1-2*stride; // == src+2*stride-1;
+ int H = src0[1] - src0[-1];
+ int V = src1[0] - src2[ 0];
+ for(k=2; k<=4; ++k) {
+ src1 += stride; src2 -= stride;
+ H += k*(src0[k] - src0[-k]);
+ V += k*(src1[0] - src2[ 0]);
+ }
+ H = ( 17*H+16 ) >> 5;
+ V = ( 17*V+16 ) >> 5;
+
+ a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
+ for(j=8; j>0; --j) {
+ int b = a;
+ a += V;
+ src[0] = CLIP((b ) >> 5);
+ src[1] = CLIP((b+ H) >> 5);
+ src[2] = CLIP((b+2*H) >> 5);
+ src[3] = CLIP((b+3*H) >> 5);
+ src[4] = CLIP((b+4*H) >> 5);
+ src[5] = CLIP((b+5*H) >> 5);
+ src[6] = CLIP((b+6*H) >> 5);
+ src[7] = CLIP((b+7*H) >> 5);
+ src += stride;
+ }
+}
+
+#define SRC(x,y) src[(x)+(y)*stride]
+#define PL(y) \
+ const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_LEFT \
+ const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
+ + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
+ PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
+ const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
+
+#define PT(x) \
+ const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOP \
+ const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
+ + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
+ PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
+ const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
+ + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
+
+#define PTR(x) \
+ t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOPRIGHT \
+ int t8, t9, t10, t11, t12, t13, t14, t15; \
+ if(has_topright) { \
+ PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
+ t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
+ } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
+
+#define PREDICT_8x8_LOAD_TOPLEFT \
+ const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
+
+#define PREDICT_8x8_DC(v) \
+ int y; \
+ for( y = 0; y < 8; y++ ) { \
+ ((pixel4*)src)[0] = \
+ ((pixel4*)src)[1] = v; \
+ src += stride; \
+ }
+
+static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+
+ PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
+}
+static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+
+ PREDICT_8x8_LOAD_LEFT;
+ const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
+ PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+
+ PREDICT_8x8_LOAD_TOP;
+ const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
+ PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOP;
+ const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
+ +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
+ PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+
+ PREDICT_8x8_LOAD_LEFT;
+#define ROW(y) ((pixel4*)(src+y*stride))[0] =\
+ ((pixel4*)(src+y*stride))[1] = PIXEL_SPLAT_X4(l##y)
+ ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
+#undef ROW
+}
+static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ int y;
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+
+ PREDICT_8x8_LOAD_TOP;
+ src[0] = t0;
+ src[1] = t1;
+ src[2] = t2;
+ src[3] = t3;
+ src[4] = t4;
+ src[5] = t5;
+ src[6] = t6;
+ src[7] = t7;
+ for( y = 1; y < 8; y++ ) {
+ ((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0];
+ ((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1];
+ }
+}
+static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_TOPRIGHT;
+ SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+ SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
+ SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
+ SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
+ SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
+ SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
+ SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
+ SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
+ SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
+ SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOPLEFT;
+ SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
+ SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+ SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
+ SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+ SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
+ SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+ SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
+ SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
+ SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
+ SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOPLEFT;
+ SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
+ SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+ SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
+ SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+ SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
+ SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+ SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
+ SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
+ SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
+ SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
+ SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
+ SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
+ SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
+ SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
+ SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
+ SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+ SRC(7,0)= (t6 + t7 + 1) >> 1;
+}
+static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_LEFT;
+ PREDICT_8x8_LOAD_TOPLEFT;
+ SRC(0,7)= (l6 + l7 + 1) >> 1;
+ SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
+ SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
+ SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
+ SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
+ SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
+ SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
+ SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
+ SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
+ SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
+ SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
+ SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
+ SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
+ SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
+ SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
+ SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
+ SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
+ SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
+ SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
+ SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
+ SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
+ SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ PREDICT_8x8_LOAD_TOP;
+ PREDICT_8x8_LOAD_TOPRIGHT;
+ SRC(0,0)= (t0 + t1 + 1) >> 1;
+ SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
+ SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
+ SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
+ SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
+ SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
+ SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
+ SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
+ SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
+ SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+ SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
+ SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+ SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
+ SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
+ SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
+ SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
+ SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
+ SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
+ SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
+ SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
+ SRC(7,6)= (t10 + t11 + 1) >> 1;
+ SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+{
+ pixel *src = (pixel*)_src;
+ int stride = _stride/sizeof(pixel);
+ PREDICT_8x8_LOAD_LEFT;
+ SRC(0,0)= (l0 + l1 + 1) >> 1;
+ SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
+ SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
+ SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
+ SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
+ SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
+ SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
+ SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
+ SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
+ SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
+ SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
+ SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
+ SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
+ SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
+ SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
+ SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
+ SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
+ SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
+}
+#undef PREDICT_8x8_LOAD_LEFT
+#undef PREDICT_8x8_LOAD_TOP
+#undef PREDICT_8x8_LOAD_TOPLEFT
+#undef PREDICT_8x8_LOAD_TOPRIGHT
+#undef PREDICT_8x8_DC
+#undef PTR
+#undef PT
+#undef PL
+#undef SRC
+
+static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
+ int i;
+ pixel *pix = (pixel*)_pix;
+ const dctcoef *block = (const dctcoef*)_block;
+ stride /= sizeof(pixel);
+ pix -= stride;
+ for(i=0; i<4; i++){
+ pixel v = pix[0];
+ pix[1*stride]= v += block[0];
+ pix[2*stride]= v += block[4];
+ pix[3*stride]= v += block[8];
+ pix[4*stride]= v + block[12];
+ pix++;
+ block++;
+ }
+}
+
+static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
+ int i;
+ pixel *pix = (pixel*)_pix;
+ const dctcoef *block = (const dctcoef*)_block;
+ stride /= sizeof(pixel);
+ for(i=0; i<4; i++){
+ pixel v = pix[-1];
+ pix[0]= v += block[0];
+ pix[1]= v += block[1];
+ pix[2]= v += block[2];
+ pix[3]= v + block[3];
+ pix+= stride;
+ block+= 4;
+ }
+}
+
+static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
+ int i;
+ pixel *pix = (pixel*)_pix;
+ const dctcoef *block = (const dctcoef*)_block;
+ stride /= sizeof(pixel);
+ pix -= stride;
+ for(i=0; i<8; i++){
+ pixel v = pix[0];
+ pix[1*stride]= v += block[0];
+ pix[2*stride]= v += block[8];
+ pix[3*stride]= v += block[16];
+ pix[4*stride]= v += block[24];
+ pix[5*stride]= v += block[32];
+ pix[6*stride]= v += block[40];
+ pix[7*stride]= v += block[48];
+ pix[8*stride]= v + block[56];
+ pix++;
+ block++;
+ }
+}
+
+static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
+ int i;
+ pixel *pix = (pixel*)_pix;
+ const dctcoef *block = (const dctcoef*)_block;
+ stride /= sizeof(pixel);
+ for(i=0; i<8; i++){
+ pixel v = pix[-1];
+ pix[0]= v += block[0];
+ pix[1]= v += block[1];
+ pix[2]= v += block[2];
+ pix[3]= v += block[3];
+ pix[4]= v += block[4];
+ pix[5]= v += block[5];
+ pix[6]= v += block[6];
+ pix[7]= v + block[7];
+ pix+= stride;
+ block+= 8;
+ }
+}
+
+static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<16; i++)
+ FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<16; i++)
+ FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<4; i++)
+ FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<4; i++)
+ FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
diff --git a/libavcodec/high_bit_depth.h b/libavcodec/high_bit_depth.h
new file mode 100644
index 0000000000..6f2b6a74f4
--- /dev/null
+++ b/libavcodec/high_bit_depth.h
@@ -0,0 +1,85 @@
+#include "dsputil.h"
+
+#ifndef BIT_DEPTH
+#define BIT_DEPTH 8
+#endif
+
+#ifdef AVCODEC_H264_HIGH_DEPTH_H
+# undef pixel
+# undef pixel2
+# undef pixel4
+# undef dctcoef
+# undef INIT_CLIP
+# undef no_rnd_avg_pixel4
+# undef rnd_avg_pixel4
+# undef AV_RN2P
+# undef AV_RN4P
+# undef AV_WN2P
+# undef AV_WN4P
+# undef AV_WN4PA
+# undef CLIP
+# undef FUNC
+# undef FUNCC
+# undef av_clip_pixel
+# undef PIXEL_SPLAT_X4
+#else
+# define AVCODEC_H264_HIGH_DEPTH_H
+# define CLIP_PIXEL(depth)\
+ static inline uint16_t av_clip_pixel_ ## depth (int p)\
+ {\
+ const int pixel_max = (1 << depth)-1;\
+ return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\
+ }
+
+CLIP_PIXEL( 9)
+CLIP_PIXEL(10)
+#endif
+
+#if BIT_DEPTH > 8
+# define pixel uint16_t
+# define pixel2 uint32_t
+# define pixel4 uint64_t
+# define dctcoef int32_t
+
+# define INIT_CLIP
+# define no_rnd_avg_pixel4 no_rnd_avg64
+# define rnd_avg_pixel4 rnd_avg64
+# define AV_RN2P AV_RN32
+# define AV_RN4P AV_RN64
+# define AV_WN2P AV_WN32
+# define AV_WN4P AV_WN64
+# define AV_WN4PA AV_WN64A
+# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+#else
+# define pixel uint8_t
+# define pixel2 uint16_t
+# define pixel4 uint32_t
+# define dctcoef int16_t
+
+# define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+# define no_rnd_avg_pixel4 no_rnd_avg32
+# define rnd_avg_pixel4 rnd_avg32
+# define AV_RN2P AV_RN16
+# define AV_RN4P AV_RN32
+# define AV_WN2P AV_WN16
+# define AV_WN4P AV_WN32
+# define AV_WN4PA AV_WN32A
+# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+#endif
+
+#if BIT_DEPTH == 8
+# define av_clip_pixel(a) av_clip_uint8(a)
+# define CLIP(a) cm[a]
+# define FUNC(a) a ## _8
+# define FUNCC(a) a ## _8_c
+#elif BIT_DEPTH == 9
+# define av_clip_pixel(a) av_clip_pixel_9(a)
+# define CLIP(a) av_clip_pixel_9(a)
+# define FUNC(a) a ## _9
+# define FUNCC(a) a ## _9_c
+#elif BIT_DEPTH == 10
+# define av_clip_pixel(a) av_clip_pixel_10(a)
+# define CLIP(a) av_clip_pixel_10(a)
+# define FUNC(a) a ## _10
+# define FUNCC(a) a ## _10_c
+#endif
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index 6d7671c81f..d041cafe85 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -158,7 +158,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
const int32_t *samples, int blocksize, int min_order,
int max_order, int precision,
int32_t coefs[][MAX_LPC_ORDER], int *shift,
- enum AVLPCType lpc_type, int lpc_passes,
+ enum FFLPCType lpc_type, int lpc_passes,
int omethod, int max_shift, int zero_shift)
{
double autoc[MAX_LPC_ORDER+1];
@@ -168,7 +168,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
int opt_order;
assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER &&
- lpc_type > AV_LPC_TYPE_FIXED);
+ lpc_type > FF_LPC_TYPE_FIXED);
/* reinit LPC context if parameters have changed */
if (blocksize != s->blocksize || max_order != s->max_order ||
@@ -177,7 +177,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
ff_lpc_init(s, blocksize, max_order, lpc_type);
}
- if (lpc_type == AV_LPC_TYPE_LEVINSON) {
+ if (lpc_type == FF_LPC_TYPE_LEVINSON) {
double *windowed_samples = s->windowed_samples + max_order;
s->lpc_apply_welch_window(samples, blocksize, windowed_samples);
@@ -188,7 +188,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
for(i=0; i<max_order; i++)
ref[i] = fabs(lpc[i][i]);
- } else if (lpc_type == AV_LPC_TYPE_CHOLESKY) {
+ } else if (lpc_type == FF_LPC_TYPE_CHOLESKY) {
LLSModel m[2];
double var[MAX_LPC_ORDER+1], av_uninit(weight);
@@ -241,13 +241,13 @@ int ff_lpc_calc_coefs(LPCContext *s,
}
av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order,
- enum AVLPCType lpc_type)
+ enum FFLPCType lpc_type)
{
s->blocksize = blocksize;
s->max_order = max_order;
s->lpc_type = lpc_type;
- if (lpc_type == AV_LPC_TYPE_LEVINSON) {
+ if (lpc_type == FF_LPC_TYPE_LEVINSON) {
s->windowed_samples = av_mallocz((blocksize + max_order + 2) *
sizeof(*s->windowed_samples));
if (!s->windowed_samples)
diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index 96b66df909..9db5dbac30 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -35,11 +35,22 @@
#define MIN_LPC_ORDER 1
#define MAX_LPC_ORDER 32
+/**
+ * LPC analysis type
+ */
+enum FFLPCType {
+ FF_LPC_TYPE_DEFAULT = -1, ///< use the codec default LPC type
+ FF_LPC_TYPE_NONE = 0, ///< do not use LPC prediction or use all zero coefficients
+ FF_LPC_TYPE_FIXED = 1, ///< fixed LPC coefficients
+ FF_LPC_TYPE_LEVINSON = 2, ///< Levinson-Durbin recursion
+ FF_LPC_TYPE_CHOLESKY = 3, ///< Cholesky factorization
+ FF_LPC_TYPE_NB , ///< Not part of ABI
+};
typedef struct LPCContext {
int blocksize;
int max_order;
- enum AVLPCType lpc_type;
+ enum FFLPCType lpc_type;
double *windowed_samples;
/**
@@ -77,14 +88,14 @@ int ff_lpc_calc_coefs(LPCContext *s,
const int32_t *samples, int blocksize, int min_order,
int max_order, int precision,
int32_t coefs[][MAX_LPC_ORDER], int *shift,
- enum AVLPCType lpc_type, int lpc_passes,
+ enum FFLPCType lpc_type, int lpc_passes,
int omethod, int max_shift, int zero_shift);
/**
* Initialize LPCContext.
*/
int ff_lpc_init(LPCContext *s, int blocksize, int max_order,
- enum AVLPCType lpc_type);
+ enum FFLPCType lpc_type);
void ff_lpc_init_x86(LPCContext *s);
/**
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
index 3b2d693d88..1a18a8a223 100644
--- a/libavcodec/mlib/dsputil_mlib.c
+++ b/libavcodec/mlib/dsputil_mlib.c
@@ -421,13 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data)
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
c->get_pixels = get_pixels_mlib;
c->diff_pixels = diff_pixels_mlib;
c->add_pixels_clamped = add_pixels_clamped_mlib;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_mlib;
c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
diff --git a/libavcodec/options.c b/libavcodec/options.c
index c0eb790b59..de25635161 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -103,7 +103,7 @@ static const AVOption options[]={
{"umh", "umh motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_UMH }, INT_MIN, INT_MAX, V|E, "me_method" },
{"iter", "iter motion estimation", 0, FF_OPT_TYPE_CONST, {.dbl = ME_ITER }, INT_MIN, INT_MAX, V|E, "me_method" },
{"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
-{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, {.dbl=0}, INT_MIN, INT_MAX},
+{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, {.dbl = 0}, INT_MIN, INT_MAX},
{"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, {.dbl = 12 }, INT_MIN, INT_MAX, V|E},
{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
{"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
@@ -224,7 +224,7 @@ static const AVOption options[]={
{"left", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_LEFT }, INT_MIN, INT_MAX, V|E, "pred"},
{"plane", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_PLANE }, INT_MIN, INT_MAX, V|E, "pred"},
{"median", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_PRED_MEDIAN }, INT_MIN, INT_MAX, V|E, "pred"},
-{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, {.dbl=0}, 0, 10, V|E},
+{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E},
{"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, INT_MAX, V|A|S|E|D, "debug"},
{"pict", "picture info", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_PICT_INFO }, INT_MIN, INT_MAX, V|D, "debug"},
{"rc", "rate control", 0, FF_OPT_TYPE_CONST, {.dbl = FF_DEBUG_RC }, INT_MIN, INT_MAX, V|E, "debug"},
@@ -380,12 +380,14 @@ static const AVOption options[]={
{"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_INTRA_VLC }, INT_MIN, INT_MAX, V|E, "flags2"},
{"b_sensitivity", "adjusts sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), FF_OPT_TYPE_INT, {.dbl = 40 }, 1, INT_MAX, V|E},
{"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, {.dbl = FF_COMPRESSION_DEFAULT }, INT_MIN, INT_MAX, V|A|E},
-{"lpc_coeff_precision", "LPC coefficient precision (FLAC)", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, A|E},
{"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
{"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
-{"prediction_order_method", "search method for selecting prediction order", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
-{"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
-{"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+#if FF_API_FLAC_GLOBAL_OPTS
+{"lpc_coeff_precision", "deprecated, use flac-specific options", OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, A|E},
+{"prediction_order_method", "deprecated, use flac-specific options", OFFSET(prediction_order_method), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+{"min_partition_order", "deprecated, use flac-specific options", OFFSET(min_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+{"max_partition_order", "deprecated, use flac-specific options", OFFSET(max_partition_order), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+#endif
{"timecode_frame_start", "GOP timecode frame start number, in non drop frame format", OFFSET(timecode_frame_start), FF_OPT_TYPE_INT64, {.dbl = 0 }, 0, INT64_MAX, V|E},
{"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_DROP_FRAME_TIMECODE }, INT_MIN, INT_MAX, V|E, "flags2"},
{"non_linear_q", "use non linear quantizer", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_NON_LINEAR_QUANT }, INT_MIN, INT_MAX, V|E, "flags2"},
@@ -416,12 +418,14 @@ static const AVOption options[]={
{"intra_refresh", "use periodic insertion of intra blocks instead of keyframes", 0, FF_OPT_TYPE_CONST, {.dbl = CODEC_FLAG2_INTRA_REFRESH }, INT_MIN, INT_MAX, V|E, "flags2"},
{"crf_max", "in crf mode, prevents vbv from lowering quality beyond this point", OFFSET(crf_max), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 51, V|E},
{"log_level_offset", "set the log level offset", OFFSET(log_level_offset), FF_OPT_TYPE_INT, {.dbl = 0 }, INT_MIN, INT_MAX },
-{"lpc_type", "specify LPC algorithm", OFFSET(lpc_type), FF_OPT_TYPE_INT, {.dbl = AV_LPC_TYPE_DEFAULT }, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E},
+#if FF_API_FLAC_GLOBAL_OPTS
+{"lpc_type", "deprecated, use flac-specific options", OFFSET(lpc_type), FF_OPT_TYPE_INT, {.dbl = AV_LPC_TYPE_DEFAULT }, AV_LPC_TYPE_DEFAULT, AV_LPC_TYPE_NB-1, A|E},
{"none", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_NONE }, INT_MIN, INT_MAX, A|E, "lpc_type"},
{"fixed", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_FIXED }, INT_MIN, INT_MAX, A|E, "lpc_type"},
{"levinson", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_LEVINSON }, INT_MIN, INT_MAX, A|E, "lpc_type"},
{"cholesky", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = AV_LPC_TYPE_CHOLESKY }, INT_MIN, INT_MAX, A|E, "lpc_type"},
-{"lpc_passes", "number of passes to use for Cholesky factorization during LPC analysis", OFFSET(lpc_passes), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+{"lpc_passes", "deprecated, use flac-specific options", OFFSET(lpc_passes), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, A|E},
+#endif
{"slices", "number of slices, used in parallelized decoding", OFFSET(slices), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, INT_MAX, V|E},
{"thread_type", "select multithreading type", OFFSET(thread_type), FF_OPT_TYPE_INT, {.dbl = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|E|D, "thread_type"},
{"slice", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_THREAD_SLICE }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index a8d0a61a85..bd432beb87 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1384,7 +1384,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
c->pix_abs[0][1] = sad16_x2_altivec;
c->pix_abs[0][2] = sad16_y2_altivec;
@@ -1399,10 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
c->get_pixels = get_pixels_altivec;
- if (!h264_high_depth)
+ if (!high_bit_depth)
c->clear_block = clear_block_altivec;
c->add_bytes= add_bytes_altivec;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_altivec;
/* the two functions do the same thing, so use the same code */
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 57f30ef703..327fe2c72f 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -153,11 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h)
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
// Common optimizations whether AltiVec is available or not
c->prefetch = prefetch_ppc;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
switch (check_dcbzl_effect()) {
case 32:
c->clear_blocks = clear_blocks_dcbz32_ppc;
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 0e58846f51..9df18888ad 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -965,10 +965,10 @@ H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c
index 4190f9da10..349583f1ba 100644
--- a/libavcodec/ps2/dsputil_mmi.c
+++ b/libavcodec/ps2/dsputil_mmi.c
@@ -142,9 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->clear_blocks = clear_blocks_mmi;
c->put_pixels_tab[1][0] = put_pixels8_mmi;
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index a0912056d7..351ba9e871 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -54,7 +54,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
ractx->lpc_coef[1] = ractx->lpc_tables[1];
ractx->avctx = avctx;
ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
- AV_LPC_TYPE_LEVINSON);
+ FF_LPC_TYPE_LEVINSON);
return ret;
}
@@ -461,7 +461,7 @@ static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame,
32)];
ff_lpc_calc_coefs(&ractx->lpc_ctx, lpc_data, NBLOCKS * BLOCKSIZE, LPC_ORDER,
- LPC_ORDER, 16, lpc_coefs, shift, AV_LPC_TYPE_LEVINSON,
+ LPC_ORDER, 16, lpc_coefs, shift, FF_LPC_TYPE_LEVINSON,
0, ORDER_METHOD_EST, 12, 0);
for (i = 0; i < LPC_ORDER; i++)
block_coefs[NBLOCKS - 1][i] = -(lpc_coefs[LPC_ORDER - 1][i] <<
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 93b663894a..8be9318cdb 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -333,9 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
{
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
@@ -405,7 +405,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_qpel, 1, 8);
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
dspfunc(put_h264_qpel, 0, 16);
dspfunc(put_h264_qpel, 1, 8);
dspfunc(put_h264_qpel, 2, 4);
@@ -415,7 +415,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
}
#undef dspfunc
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c
index 219bb4c353..d254e1db6b 100644
--- a/libavcodec/sh4/dsputil_sh4.c
+++ b/libavcodec/sh4/dsputil_sh4.c
@@ -92,10 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
dsputil_init_align(c,avctx);
- if (!h264_high_depth)
+ if (!high_bit_depth)
c->clear_blocks = clear_blocks_sh4;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
c->idct_put = idct_put;
diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c
index ba921ad772..e4236602f6 100644
--- a/libavcodec/sparc/dsputil_vis.c
+++ b/libavcodec/sparc/dsputil_vis.c
@@ -3953,7 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
{
/* VIS-specific optimizations */
int accel = vis_level ();
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (accel & ACCEL_SPARC_VIS) {
if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){
@@ -3963,7 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
}
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = MC_put_o_16_vis;
c->put_pixels_tab[0][1] = MC_put_x_16_vis;
c->put_pixels_tab[0][2] = MC_put_y_16_vis;
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index c5d932d399..86a1b4957a 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -1297,9 +1297,9 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op))
unsigned int ff_toupper4(unsigned int x)
{
return toupper( x &0xFF)
- + (toupper((x>>8 )&0xFF)<<8 )
- + (toupper((x>>16)&0xFF)<<16)
- + (toupper((x>>24)&0xFF)<<24);
+ + (toupper((x>>8 )&0xFF)<<8 )
+ + (toupper((x>>16)&0xFF)<<16)
+ + (toupper((x>>24)&0xFF)<<24);
}
#if !HAVE_PTHREADS
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 56d1eac52c..067cf4af89 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -62,5 +62,8 @@
#ifndef FF_API_OLD_FF_PICT_TYPES
#define FF_API_OLD_FF_PICT_TYPES (LIBAVCODEC_VERSION_MAJOR < 54)
#endif
+#ifndef FF_API_FLAC_GLOBAL_OPTS
+#define FF_API_FLAC_GLOBAL_OPTS (LIBAVCODEC_VERSION_MAJOR < 54)
+#endif
#endif /* AVCODEC_VERSION_H */
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 985a15d2f1..1a5413bda4 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2322,7 +2322,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
- const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -2404,7 +2404,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->clear_block = clear_block_mmx;
c->clear_blocks = clear_blocks_mmx;
if ((mm_flags & AV_CPU_FLAG_SSE) &&
@@ -2421,7 +2421,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
SET_HPEL_FUNCS(put, 0, 16, mmx);
SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
SET_HPEL_FUNCS(avg, 0, 16, mmx);
@@ -2436,13 +2436,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->gmc= gmc_mmx;
#endif
#if ARCH_X86_32 && HAVE_YASM
- if (!h264_high_depth)
+ if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_mmx;
#endif
c->add_bytes= add_bytes_mmx;
- if (!h264_high_depth)
+ if (!high_bit_depth)
c->draw_edges = draw_edges_mmx;
if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
@@ -2451,7 +2451,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#if HAVE_YASM
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
}
@@ -2463,7 +2463,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->prefetch = prefetch_mmx2;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
@@ -2480,7 +2480,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
@@ -2529,7 +2529,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
@@ -2547,7 +2547,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
@@ -2564,7 +2564,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
c->prefetch = prefetch_3dnow;
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
@@ -2602,7 +2602,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
@@ -2617,7 +2617,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
#if HAVE_YASM
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
}
@@ -2635,7 +2635,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
@@ -2643,7 +2643,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
if(mm_flags & AV_CPU_FLAG_SSE2){
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
@@ -2660,7 +2660,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#if HAVE_SSSE3
if(mm_flags & AV_CPU_FLAG_SSSE3){
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3);
@@ -2675,7 +2675,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 3, ssse3);
}
#if HAVE_YASM
- if (!h264_high_depth) {
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
@@ -2737,7 +2737,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
- if (!h264_high_depth)
+ if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
c->gmc= gmc_sse;
#endif
diff --git a/libavformat/utils.c b/libavformat/utils.c
index b4c51d135a..7624cb4f67 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -3048,12 +3048,12 @@ static int ff_interleave_compare_dts(AVFormatContext *s, AVPacket *next, AVPacke
{
AVStream *st = s->streams[ pkt ->stream_index];
AVStream *st2= s->streams[ next->stream_index];
- int64_t a= st2->time_base.num * (int64_t)st ->time_base.den;
- int64_t b= st ->time_base.num * (int64_t)st2->time_base.den;
- int64_t dts1 = av_rescale_rnd(pkt->dts, b, a, AV_ROUND_DOWN);
- if (dts1==next->dts && dts1==av_rescale_rnd(pkt->dts, b, a, AV_ROUND_UP))
+ int comp = av_compare_ts(next->dts, st2->time_base, pkt->dts,
+ st->time_base);
+
+ if (comp == 0)
return pkt->stream_index < next->stream_index;
- return dts1 < next->dts;
+ return comp > 0;
}
int av_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out, AVPacket *pkt, int flush){
diff --git a/libavutil/opt.h b/libavutil/opt.h
index 018d8a2b4f..b04c7905d6 100644
--- a/libavutil/opt.h
+++ b/libavutil/opt.h
@@ -67,6 +67,9 @@ typedef struct AVOption {
union {
double dbl;
const char *str;
+ /* TODO those are unused now */
+ int64_t i64;
+ AVRational q;
} default_val;
double min; ///< minimum valid value for the option
double max; ///< maximum valid value for the option
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 6713da23f6..69d28dd4cc 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -136,7 +136,7 @@ enum PixelFormat {
PIX_FMT_BGR48BE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big-endian
PIX_FMT_BGR48LE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as little-endian
- //the following 4 formats are deprecated and should be replaced by PIX_FMT_YUV420P16* with the bpp stored seperately
+ //the following 6 formats are deprecated and should be replaced by PIX_FMT_YUV420P16* with the bpp stored seperately
PIX_FMT_YUV420P9BE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
PIX_FMT_YUV420P9LE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
PIX_FMT_YUV420P10BE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 2c6269487a..f00a45cf38 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -107,9 +107,13 @@ const char *swscale_license(void)
|| (x)==PIX_FMT_YUV440P \
|| (x)==PIX_FMT_MONOWHITE \
|| (x)==PIX_FMT_MONOBLACK \
+ || (x)==PIX_FMT_YUV420P9LE \
+ || (x)==PIX_FMT_YUV420P10LE \
|| (x)==PIX_FMT_YUV420P16LE \
|| (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \
+ || (x)==PIX_FMT_YUV420P9BE \
+ || (x)==PIX_FMT_YUV420P10BE \
|| (x)==PIX_FMT_YUV420P16BE \
|| (x)==PIX_FMT_YUV422P16BE \
|| (x)==PIX_FMT_YUV444P16BE \
diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak
index 259e1e0ea0..9cb669c1a0 100644
--- a/tests/fate/h264.mak
+++ b/tests/fate/h264.mak
@@ -127,6 +127,13 @@ FATE_H264 = aud_mw_e \
frext-hpcvflnl_bcrm_a \
frext-hpcvmolq_brcm_b \
frext-hpcvnl_brcm_a \
+ frext-pph10i1_panasonic_a \
+ frext-pph10i2_panasonic_a \
+ frext-pph10i3_panasonic_a \
+ frext-pph10i4_panasonic_a \
+ frext-pph10i5_panasonic_a \
+ frext-pph10i6_panasonic_a \
+ frext-pph10i7_panasonic_a \
hcbp2_hhi_a \
hcmp1_hhi_a \
ls_sva_d \
@@ -301,6 +308,13 @@ fate-h264-conformance-frext-hpcvfl_bcrm_a: CMD = framecrc -i $(SAMPLES)/h264-co
fate-h264-conformance-frext-hpcvflnl_bcrm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVFLNL_BRCM_A.264 -vsync 0
fate-h264-conformance-frext-hpcvmolq_brcm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVMOLQ_BRCM_B.264
fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVNL_BRCM_A.264
+fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264
+fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264
+fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264
+fate-h264-conformance-frext-pph10i4_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I4_Panasonic_A.264
+fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264
+fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264
+fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264
fate-h264-conformance-hcbp2_hhi_a: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCBP2_HHI_A.264
fate-h264-conformance-hcmp1_hhi_a: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCMP1_HHI_A.264
fate-h264-conformance-ls_sva_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/LS_SVA_D.264
diff --git a/tests/ref/fate/h264-conformance-frext-pph10i1_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i1_panasonic_a
new file mode 100644
index 0000000000..1cfc313e32
--- /dev/null
+++ b/tests/ref/fate/h264-conformance-frext-pph10i1_panasonic_a
@@ -0,0 +1,10 @@
+0, 0, 2764800, 0xcc4df07d
+0, 3600, 2764800, 0x85f9e6d4
+0, 7200, 2764800, 0x23ffe90d
+0, 10800, 2764800, 0xf0a6d453
+0, 14400, 2764800, 0x913a6392
+0, 18000, 2764800, 0xcc5f9736
+0, 21600, 2764800, 0x43f9f9ce
+0, 25200, 2764800, 0xc874b44e
+0, 28800, 2764800, 0x83b665e6
+0, 32400, 2764800, 0x5ea2e31e
diff --git a/tests/ref/fate/h264-conformance-frext-pph10i2_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i2_panasonic_a
new file mode 100644
index 0000000000..274bdaf2b1
--- /dev/null
+++ b/tests/ref/fate/h264-conformance-frext-pph10i2_panasonic_a
@@ -0,0 +1,10 @@
+0, 0, 2764800, 0x4f710132
+0, 3600, 2764800, 0x57e5b713
+0, 7200, 2764800, 0xcca01477
+0, 10800, 2764800, 0xa19a95cd
+0, 14400, 2764800, 0x700a757d
+0, 18000, 2764800, 0xd8c6f60f
+0, 21600, 2764800, 0x95a1bbc7
+0, 25200, 2764800, 0x0582077a
+0, 28800, 2764800, 0x91595f91
+0, 32400, 2764800, 0xf5fe034a
diff --git a/tests/ref/fate/h264-conformance-frext-pph10i3_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i3_panasonic_a
new file mode 100644
index 0000000000..195e45a67b
--- /dev/null
+++ b/tests/ref/fate/h264-conformance-frext-pph10i3_panasonic_a
@@ -0,0 +1,10 @@
+0, 0, 2764800, 0xda69f69e
+0, 3600, 2764800, 0x29ed832f
+0, 7200, 2764800, 0xb3244cc4
+0, 10800, 2764800, 0xe41a312c
+0, 14400, 2764800, 0xac0b344b
+0, 18000, 2764800, 0xc585aa20
+0, 21600, 2764800, 0x0952054c
+0, 25200, 2764800, 0xd1a02f87
+0, 28800, 2764800, 0xfcbfe87c
+0, 32400, 2764800, 0xe4e9b8a2
diff --git a/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a
new file mode 100644
index 0000000000..d351a7eb1f
--- /dev/null
+++ b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a
@@ -0,0 +1,19 @@
+0, 0, 6220800, 0xca2a2a5e
+0, 3600, 6220800, 0x8009a65e
+0, 7200, 6220800, 0x63e72b3b
+0, 10800, 6220800, 0x7459a1cc
+0, 14400, 6220800, 0x02191aa9
+0, 18000, 6220800, 0x88dca590
+0, 21600, 6220800, 0x56dd150a
+0, 25200, 6220800, 0x5f56a56f
+0, 28800, 6220800, 0x67ada4b7
+0, 32400, 6220800, 0x88dca590
+0, 36000, 6220800, 0xd3b09fe5
+0, 39600, 6220800, 0x2223998c
+0, 43200, 6220800, 0x5e5b2da5
+0, 46800, 6220800, 0x88dca590
+0, 50400, 6220800, 0x5e5b2da5
+0, 54000, 6220800, 0x88dca590
+0, 57600, 6220800, 0x5e5b2da5
+0, 61200, 6220800, 0x88dca590
+0, 64800, 6220800, 0x26e1ec8b
diff --git a/tests/ref/fate/h264-conformance-frext-pph10i5_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i5_panasonic_a
new file mode 100644
index 0000000000..1afbac01e7
--- /dev/null
+++ b/tests/ref/fate/h264-conformance-frext-pph10i5_panasonic_a
@@ -0,0 +1,10 @@
+0, 0, 6220800, 0x1df58ce9
+0, 3600, 6220800, 0x8f2859ce
+0, 7200, 6220800, 0x229cc7ff
+0, 10800, 6220800, 0x73e86984
+0, 14400, 6220800, 0xb6d4504b
+0, 18000, 6220800, 0x4e7d4883
+0, 21600, 6220800, 0xbec3f0f7
+0, 25200, 6220800, 0x1d9af065
+0, 28800, 6220800, 0x44851549
+0, 32400, 6220800, 0xfcf8728e
diff --git a/tests/ref/fate/h264-conformance-frext-pph10i6_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i6_panasonic_a
new file mode 100644
index 0000000000..6d105466c9
--- /dev/null
+++ b/tests/ref/fate/h264-conformance-frext-pph10i6_panasonic_a
@@ -0,0 +1,10 @@
+0, 0, 6220800, 0x408daf70
+0, 3600, 6220800, 0x59b254a3
+0, 7200, 6220800, 0x4cf4279c
+0, 10800, 6220800, 0x5c9437ae
+0, 14400, 6220800, 0x986c3eb8
+0, 18000, 6220800, 0x23fd883e
+0, 21600, 6220800, 0x84f222fe
+0, 25200, 6220800, 0xe7f91107
+0, 28800, 6220800, 0xb544b31e
+0, 32400, 6220800, 0x1ebdde56
diff --git a/tests/ref/fate/h264-conformance-frext-pph10i7_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i7_panasonic_a
new file mode 100644
index 0000000000..28825446f9
--- /dev/null
+++ b/tests/ref/fate/h264-conformance-frext-pph10i7_panasonic_a
@@ -0,0 +1,10 @@
+0, 0, 6220800, 0xf81873fe
+0, 3600, 6220800, 0x7b96fbdc
+0, 7200, 6220800, 0x75dbafc4
+0, 10800, 6220800, 0x7524301e
+0, 14400, 6220800, 0x0f3621ab
+0, 18000, 6220800, 0xa5e25b35
+0, 21600, 6220800, 0x063a8116
+0, 25200, 6220800, 0x48ebc8ff
+0, 28800, 6220800, 0x1f635df8
+0, 32400, 6220800, 0xe282c8bd