summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ffmpeg.c41
-rw-r--r--libavcodec/alpha/dsputil_alpha.c26
-rw-r--r--libavcodec/alpha/motion_est_alpha.c12
-rw-r--r--libavcodec/avcodec.h5
-rw-r--r--libavcodec/dsputil.c127
-rw-r--r--libavcodec/dsputil.h61
-rw-r--r--libavcodec/error_resilience.c4
-rw-r--r--libavcodec/h263.c170
-rw-r--r--libavcodec/h263data.h4
-rw-r--r--libavcodec/i386/dsputil_mmx.c20
-rw-r--r--libavcodec/i386/motion_est_mmx.c120
-rw-r--r--libavcodec/motion_est.c819
-rw-r--r--libavcodec/motion_est_template.c259
-rw-r--r--libavcodec/mpeg12.c254
-rw-r--r--libavcodec/mpegvideo.c305
-rw-r--r--libavcodec/mpegvideo.h60
-rw-r--r--libavcodec/ppc/dsputil_altivec.c40
-rw-r--r--libavcodec/ppc/dsputil_altivec.h16
-rw-r--r--libavcodec/ppc/dsputil_ppc.c14
-rw-r--r--libavcodec/ratecontrol.c2
-rw-r--r--tests/ffmpeg.regression.ref18
-rwxr-xr-xtests/regression.sh2
-rw-r--r--tests/rotozoom.regression.ref18
23 files changed, 1450 insertions, 947 deletions
diff --git a/ffmpeg.c b/ffmpeg.c
index 0724984678..8476a0b03d 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -119,8 +119,10 @@ static int use_obmc = 0;
static int use_aic = 0;
static int use_aiv = 0;
static int use_umv = 0;
+static int use_alt_scan = 0;
static int do_deinterlace = 0;
-static int do_interlace = 0;
+static int do_interlace_dct = 0;
+static int do_interlace_me = 0;
static int workaround_bugs = FF_BUG_AUTODETECT;
static int error_resilience = 2;
static int error_concealment = 3;
@@ -130,6 +132,8 @@ static int use_part = 0;
static int packet_size = 0;
static int error_rate = 0;
static int strict = 0;
+static int top_field_first = -1;
+static int noise_reduction = 0;
static int debug = 0;
static int debug_mv = 0;
extern int loop_input; /* currently a hack */
@@ -635,7 +639,12 @@ static void do_video_out(AVFormatContext *s,
/* better than nothing: use input picture interlaced
settings */
big_picture.interlaced_frame = in_picture->interlaced_frame;
- big_picture.top_field_first = in_picture->top_field_first;
+ if(do_interlace_me || do_interlace_dct){
+ if(top_field_first == -1)
+ big_picture.top_field_first = in_picture->top_field_first;
+ else
+ big_picture.top_field_first = 1;
+ }
/* handles sameq here. This is not correct because it may
not be a global option */
@@ -1946,6 +1955,16 @@ static void opt_strict(const char *arg)
strict= atoi(arg);
}
+static void opt_top_field_first(const char *arg)
+{
+ top_field_first= atoi(arg);
+}
+
+static void opt_noise_reduction(const char *arg)
+{
+ noise_reduction= atoi(arg);
+}
+
static void opt_audio_bitrate(const char *arg)
{
audio_bit_rate = atoi(arg) * 1000;
@@ -2373,14 +2392,20 @@ static void opt_output_file(const char *filename)
if(use_part) {
video_enc->flags |= CODEC_FLAG_PART;
}
+ if (use_alt_scan) {
+ video_enc->flags |= CODEC_FLAG_ALT_SCAN;
+ }
if (b_frames) {
video_enc->max_b_frames = b_frames;
video_enc->b_frame_strategy = 0;
video_enc->b_quant_factor = 2.0;
}
- if (do_interlace) {
+ if (do_interlace_dct) {
video_enc->flags |= CODEC_FLAG_INTERLACED_DCT;
}
+ if (do_interlace_me) {
+ video_enc->flags |= CODEC_FLAG_INTERLACED_ME;
+ }
video_enc->qmin = video_qmin;
video_enc->qmax = video_qmax;
video_enc->mb_qmin = video_mb_qmin;
@@ -2430,6 +2455,7 @@ static void opt_output_file(const char *filename)
video_enc->idct_algo = idct_algo;
video_enc->strict_std_compliance = strict;
video_enc->error_rate = error_rate;
+ video_enc->noise_reduction= noise_reduction;
if(packet_size){
video_enc->rtp_mode= 1;
video_enc->rtp_payload_size= packet_size;
@@ -2992,16 +3018,21 @@ const OptionDef options[] = {
{ "passlogfile", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void*)&pass_logfilename}, "select two pass log file name", "file" },
{ "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace},
"deinterlace pictures" },
- { "interlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace},
- "force interlacing support in encoder (MPEG2/MPEG4)" },
+ { "ildct", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace_dct},
+ "force interlaced dct support in encoder (MPEG2/MPEG4)" },
+ { "ilme", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace_me},
+ "force interlacied me support in encoder MPEG2" },
{ "psnr", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_psnr}, "calculate PSNR of compressed frames" },
{ "vstats", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_vstats}, "dump video coding statistics to file" },
{ "vhook", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)add_frame_hooker}, "insert video processing module", "module" },
{ "aic", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_aic}, "enable Advanced intra coding (h263+)" },
{ "aiv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_aiv}, "enable Alternative inter vlc (h263+)" },
{ "umv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_umv}, "enable Unlimited Motion Vector (h263+)" },
+ { "alt", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_alt_scan}, "enable alternate scantable (mpeg2)" },
{ "intra_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_intra_matrix}, "specify intra matrix coeffs", "matrix" },
{ "inter_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_inter_matrix}, "specify inter matrix coeffs", "matrix" },
+ { "top", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" },
+ { "nr", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_noise_reduction}, "noise reduction", "" },
/* audio options */
{ "ab", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_bitrate}, "set audio bitrate (in kbit/s)", "bitrate", },
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 82ff7db668..496f461203 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -39,11 +39,11 @@ void get_pixels_mvi(DCTELEM *restrict block,
const uint8_t *restrict pixels, int line_size);
void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
int stride);
-int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
+int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
-int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
-int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
-int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
+int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
#if 0
/* These functions were the base for the optimized assembler routines,
@@ -290,11 +290,6 @@ static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
return pix_abs16x16_mvi_asm(a, b, stride);
}
-static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
-{
- return pix_abs8x8_mvi(a, b, stride);
-}
-
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
@@ -347,12 +342,13 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
c->get_pixels = get_pixels_mvi;
c->diff_pixels = diff_pixels_mvi;
c->sad[0] = sad16x16_mvi;
- c->sad[1] = sad8x8_mvi;
- c->pix_abs8x8 = pix_abs8x8_mvi;
- c->pix_abs16x16 = pix_abs16x16_mvi_asm;
- c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi;
- c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi;
- c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi;
+ c->sad[1] = pix_abs8x8_mvi;
+// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed
+ c->pix_abs[0][0] = sad16x16_mvi;
+ c->pix_abs[1][0] = pix_abs8x8_mvi;
+ c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
+ c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
+ c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
}
put_pixels_clamped_axp_p = c->put_pixels_clamped;
diff --git a/libavcodec/alpha/motion_est_alpha.c b/libavcodec/alpha/motion_est_alpha.c
index 804e1d2b6b..8b8a0a25c5 100644
--- a/libavcodec/alpha/motion_est_alpha.c
+++ b/libavcodec/alpha/motion_est_alpha.c
@@ -84,10 +84,9 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
return r1 + r2;
}
-int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
+int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int result = 0;
- int h = 8;
if ((size_t) pix2 & 0x7) {
/* works only when pix2 is actually unaligned */
@@ -160,10 +159,9 @@ int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
}
#endif
-int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
+int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int result = 0;
- int h = 16;
uint64_t disalign = (size_t) pix2 & 0x7;
switch (disalign) {
@@ -234,10 +232,9 @@ int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
return result;
}
-int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
+int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int result = 0;
- int h = 16;
if ((size_t) pix2 & 0x7) {
uint64_t t, p2_l, p2_r;
@@ -288,10 +285,9 @@ int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
return result;
}
-int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
+int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int result = 0;
- int h = 16;
uint64_t p1_l, p1_r;
uint64_t p2_l, p2_r, p2_x;
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index ef9950bf43..7d9f8c46ed 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -17,7 +17,7 @@ extern "C" {
#define FFMPEG_VERSION_INT 0x000408
#define FFMPEG_VERSION "0.4.8"
-#define LIBAVCODEC_BUILD 4697
+#define LIBAVCODEC_BUILD 4698
#define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
#define LIBAVCODEC_VERSION FFMPEG_VERSION
@@ -263,7 +263,8 @@ static const __attribute__((unused)) int Motion_Est_QTab[] =
#define CODEC_FLAG_H263P_AIV 0x00000008 ///< H263 Alternative inter vlc
#define CODEC_FLAG_OBMC 0x00000001 ///< OBMC
#define CODEC_FLAG_LOOP_FILTER 0x00000800 ///< loop filter
-#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000
+#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000
+#define CODEC_FLAG_INTERLACED_ME 0x20000000 ///< interlaced motion estimation
/* Unsupported options :
* Syntax Arithmetic coding (SAC)
* Reference Picture Selection
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index e516d7ee99..bc2ef8cf07 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -218,13 +218,13 @@ static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
}
}
-static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)
+static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
{
int s, i;
uint32_t *sq = squareTbl + 256;
s = 0;
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < h; i++) {
s += sq[pix1[0] - pix2[0]];
s += sq[pix1[1] - pix2[1]];
s += sq[pix1[2] - pix2[2]];
@@ -239,13 +239,13 @@ static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size)
return s;
}
-static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
+static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
uint32_t *sq = squareTbl + 256;
s = 0;
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < h; i++) {
s += sq[pix1[ 0] - pix2[ 0]];
s += sq[pix1[ 1] - pix2[ 1]];
s += sq[pix1[ 2] - pix2[ 2]];
@@ -2331,12 +2331,12 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
}
}
-static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
s = 0;
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - pix2[0]);
s += abs(pix1[1] - pix2[1]);
s += abs(pix1[2] - pix2[2]);
@@ -2359,12 +2359,12 @@ static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
s = 0;
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
@@ -2387,13 +2387,13 @@ static int pix_abs16x16_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
uint8_t *pix3 = pix2 + line_size;
s = 0;
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
@@ -2417,13 +2417,13 @@ static int pix_abs16x16_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
uint8_t *pix3 = pix2 + line_size;
s = 0;
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
@@ -2447,12 +2447,12 @@ static int pix_abs16x16_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
s = 0;
- for(i=0;i<8;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - pix2[0]);
s += abs(pix1[1] - pix2[1]);
s += abs(pix1[2] - pix2[2]);
@@ -2467,12 +2467,12 @@ static inline int pix_abs8x8_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
s = 0;
- for(i=0;i<8;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
@@ -2487,13 +2487,13 @@ static int pix_abs8x8_x2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
uint8_t *pix3 = pix2 + line_size;
s = 0;
- for(i=0;i<8;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
@@ -2509,13 +2509,13 @@ static int pix_abs8x8_y2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
+static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
uint8_t *pix3 = pix2 + line_size;
s = 0;
- for(i=0;i<8;i++) {
+ for(i=0;i<h;i++) {
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
@@ -2531,14 +2531,6 @@ static int pix_abs8x8_xy2_c(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
- return pix_abs16x16_c(a,b,stride);
-}
-
-static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
- return pix_abs8x8_c(a,b,stride);
-}
-
/**
* permutes an 8x8 block.
* @param block the block which will be permuted according to the given permutation vector
@@ -2641,10 +2633,12 @@ o2= (i1)-(i2);
#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
-static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
+static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
int i;
int temp[64];
int sum=0;
+
+ assert(h==8);
for(i=0; i<8; i++){
//FIXME try pointer walks
@@ -2735,11 +2729,13 @@ static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
return sum;
}
-static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
DCTELEM * const temp= (DCTELEM*)aligned_temp;
int sum=0, i;
+
+ assert(h==8);
s->dsp.diff_pixels(temp, src1, src2, stride);
s->dsp.fdct(temp);
@@ -2752,13 +2748,14 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
void simple_idct(DCTELEM *block); //FIXME
-static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
DCTELEM * const temp= (DCTELEM*)aligned_temp;
DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
int sum=0, i;
+ assert(h==8);
s->mb_intra=0;
s->dsp.diff_pixels(temp, src1, src2, stride);
@@ -2775,7 +2772,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
return sum;
}
-static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
const uint8_t *scantable= s->intra_scantable.permutated;
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
@@ -2787,6 +2784,8 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
uint8_t * length;
uint8_t * last_length;
+ assert(h==8);
+
for(i=0; i<8; i++){
((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
@@ -2847,12 +2846,12 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
s->dsp.idct_add(bak, stride, temp);
- distoration= s->dsp.sse[1](NULL, bak, src1, stride);
+ distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
}
-static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
const uint8_t *scantable= s->intra_scantable.permutated;
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
@@ -2861,6 +2860,8 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
const int esc_length= s->ac_esc_length;
uint8_t * length;
uint8_t * last_length;
+
+ assert(h==8);
s->dsp.diff_pixels(temp, src1, src2, stride);
@@ -2910,12 +2911,11 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
return bits;
}
-
-WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
-WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
-WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
-WARPER88_1616(rd8x8_c, rd16x16_c)
-WARPER88_1616(bit8x8_c, bit16x16_c)
+WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
+WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
+WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
+WARPER8_16_SQ(rd8x8_c, rd16_c)
+WARPER8_16_SQ(bit8x8_c, bit16_c)
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
@@ -2989,18 +2989,16 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->clear_blocks = clear_blocks_c;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
- c->sse[0]= sse16_c;
- c->sse[1]= sse8_c;
/* TODO [0] 16 [1] 8 */
- c->pix_abs16x16 = pix_abs16x16_c;
- c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
- c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
- c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
- c->pix_abs8x8 = pix_abs8x8_c;
- c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
- c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
- c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
+ c->pix_abs[0][0] = pix_abs16_c;
+ c->pix_abs[0][1] = pix_abs16_x2_c;
+ c->pix_abs[0][2] = pix_abs16_y2_c;
+ c->pix_abs[0][3] = pix_abs16_xy2_c;
+ c->pix_abs[1][0] = pix_abs8_c;
+ c->pix_abs[1][1] = pix_abs8_x2_c;
+ c->pix_abs[1][2] = pix_abs8_y2_c;
+ c->pix_abs[1][3] = pix_abs8_xy2_c;
#define dspfunc(PFX, IDX, NUM) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
@@ -3097,24 +3095,21 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
- c->hadamard8_diff[0]= hadamard8_diff16_c;
- c->hadamard8_diff[1]= hadamard8_diff_c;
c->hadamard8_abs = hadamard8_abs_c;
-
- c->dct_sad[0]= dct_sad16x16_c;
- c->dct_sad[1]= dct_sad8x8_c;
-
- c->sad[0]= sad16x16_c;
- c->sad[1]= sad8x8_c;
-
- c->quant_psnr[0]= quant_psnr16x16_c;
- c->quant_psnr[1]= quant_psnr8x8_c;
-
- c->rd[0]= rd16x16_c;
- c->rd[1]= rd8x8_c;
- c->bit[0]= bit16x16_c;
- c->bit[1]= bit8x8_c;
+#define SET_CMP_FUNC(name) \
+ c->name[0]= name ## 16_c;\
+ c->name[1]= name ## 8x8_c;
+
+ SET_CMP_FUNC(hadamard8_diff)
+ SET_CMP_FUNC(dct_sad)
+ c->sad[0]= pix_abs16_c;
+ c->sad[1]= pix_abs8_c;
+ c->sse[0]= sse16_c;
+ c->sse[1]= sse8_c;
+ SET_CMP_FUNC(quant_psnr)
+ SET_CMP_FUNC(rd)
+ SET_CMP_FUNC(bit)
c->add_bytes= add_bytes_c;
c->diff_bytes= diff_bytes_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 79b6c59c70..0bd85e19ec 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -110,9 +110,7 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
/* motion estimation */
-typedef int (*op_pixels_abs_func)(uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
-
-typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
/**
@@ -136,19 +134,21 @@ typedef struct DSPContext {
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(uint8_t * pix, int line_size);
int (*pix_norm1)(uint8_t * pix, int line_size);
- me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */
- me_cmp_func sse[2];
- me_cmp_func hadamard8_diff[2];
- me_cmp_func dct_sad[2];
- me_cmp_func quant_psnr[2];
- me_cmp_func bit[2];
- me_cmp_func rd[2];
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
+
+ me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */
+ me_cmp_func sse[4];
+ me_cmp_func hadamard8_diff[4];
+ me_cmp_func dct_sad[4];
+ me_cmp_func quant_psnr[4];
+ me_cmp_func bit[4];
+ me_cmp_func rd[4];
int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
- me_cmp_func me_pre_cmp[11];
- me_cmp_func me_cmp[11];
- me_cmp_func me_sub_cmp[11];
- me_cmp_func mb_cmp[11];
+ me_cmp_func me_pre_cmp[5];
+ me_cmp_func me_cmp[5];
+ me_cmp_func me_sub_cmp[5];
+ me_cmp_func mb_cmp[5];
/* maybe create an array for 16/8/4/2 functions */
/**
@@ -226,14 +226,7 @@ typedef struct DSPContext {
qpel_mc_func put_h264_qpel_pixels_tab[3][16];
qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
- op_pixels_abs_func pix_abs16x16;
- op_pixels_abs_func pix_abs16x16_x2;
- op_pixels_abs_func pix_abs16x16_y2;
- op_pixels_abs_func pix_abs16x16_xy2;
- op_pixels_abs_func pix_abs8x8;
- op_pixels_abs_func pix_abs8x8_x2;
- op_pixels_abs_func pix_abs8x8_y2;
- op_pixels_abs_func pix_abs8x8_xy2;
+ me_cmp_func pix_abs[2][4];
/* huffyuv specific */
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
@@ -484,12 +477,24 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
const FFTSample *input, FFTSample *tmp);
void ff_mdct_end(MDCTContext *s);
-#define WARPER88_1616(name8, name16)\
-static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\
- return name8(s, dst , src , stride)\
- +name8(s, dst+8 , src+8 , stride)\
- +name8(s, dst +8*stride, src +8*stride, stride)\
- +name8(s, dst+8+8*stride, src+8+8*stride, stride);\
+#define WARPER8_16(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+ return name8(s, dst , src , stride, h)\
+ +name8(s, dst+8 , src+8 , stride, h);\
+}
+
+#define WARPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+ int score=0;\
+ score +=name8(s, dst , src , stride, 8);\
+ score +=name8(s, dst+8 , src+8 , stride, 8);\
+ if(h==16){\
+ dst += 8*stride;\
+ src += 8*stride;\
+ score +=name8(s, dst , src , stride, 8);\
+ score +=name8(s, dst+8 , src+8 , stride, 8);\
+ }\
+ return score;\
}
#ifndef HAVE_LRINTF
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index fd39926b01..ee3b2816af 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -582,8 +582,8 @@ static int is_intra_more_likely(MpegEncContext *s){
uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize;
- is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize);
- is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
+ is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16);
+ is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
}else{
if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
is_intra_likely++;
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index 916cb764a5..f2ab381c1f 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -479,9 +479,9 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
for(i=1; i<s->mb_num; i++){
int mb_xy= s->mb_index2xy[i];
- if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_INTER4V)){
- s->mb_type[mb_xy]&= ~MB_TYPE_INTER4V;
- s->mb_type[mb_xy]|= MB_TYPE_INTER;
+ if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
+ s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
+ s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
}
}
@@ -508,9 +508,9 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
for(i=1; i<s->mb_num; i++){
int mb_xy= s->mb_index2xy[i];
- if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_DIRECT)){
- s->mb_type[mb_xy]&= ~MB_TYPE_DIRECT;
- s->mb_type[mb_xy]|= MB_TYPE_BIDIR;
+ if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
+ s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT;
+ s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR;
}
}
}
@@ -523,7 +523,7 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
*/
int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
const int mb_index= s->mb_x + s->mb_y*s->mb_stride;
- const int colocated_mb_type= s->next_picture.mb_type[mb_index]; //FIXME or next?
+ const int colocated_mb_type= s->next_picture.mb_type[mb_index];
int xy= s->block_index[0];
uint16_t time_pp= s->pp_time;
uint16_t time_pb= s->pb_time;
@@ -547,18 +547,18 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
s->mv_type = MV_TYPE_FIELD;
for(i=0; i<2; i++){
if(s->top_field_first){
- time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
- time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
+ time_pp= s->pp_field_time - s->p_field_select_table[i][mb_index] + i;
+ time_pb= s->pb_field_time - s->p_field_select_table[i][mb_index] + i;
}else{
- time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
- time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
+ time_pp= s->pp_field_time + s->p_field_select_table[i][mb_index] - i;
+ time_pb= s->pb_field_time + s->p_field_select_table[i][mb_index] - i;
}
- s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
- s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
- s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
- : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
- s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1]
- : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
+ s->mv[0][i][0] = s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx;
+ s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my;
+ s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0]
+ : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp;
+ s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1]
+ : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp;
}
return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED;
}else{
@@ -598,9 +598,9 @@ void ff_h263_update_motion_val(MpegEncContext * s){
motion_y = s->mv[0][0][1] + s->mv[0][1][1];
motion_x = (motion_x>>1) | (motion_x&1);
for(i=0; i<2; i++){
- s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0];
- s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1];
- s->field_select_table[mb_xy][i]= s->field_select[0][i];
+ s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0];
+ s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1];
+ s->p_field_select_table[i][mb_xy]= s->field_select[0][i];
}
}
@@ -744,12 +744,14 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(s->pict_type==B_TYPE){
static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
int mb_type= mb_type_table[s->mv_dir];
-
+
if(s->mb_x==0){
- s->last_mv[0][0][0]=
- s->last_mv[0][0][1]=
- s->last_mv[1][0][0]=
- s->last_mv[1][0][1]= 0;
+ for(i=0; i<2; i++){
+ s->last_mv[i][0][0]=
+ s->last_mv[i][0][1]=
+ s->last_mv[i][1][0]=
+ s->last_mv[i][1][1]= 0;
+ }
}
assert(s->dquant>=-2 && s->dquant<=2);
@@ -803,50 +805,64 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(cbp)
put_bits(&s->pb, 1, s->interlaced_dct);
if(mb_type) // not diect mode
- put_bits(&s->pb, 1, 0); // no interlaced ME yet
+ put_bits(&s->pb, 1, s->mv_type == MV_TYPE_FIELD);
}
if(interleaved_stats){
s->misc_bits+= get_bits_diff(s);
}
- switch(mb_type)
- {
- case 0: /* direct */
+ if(mb_type == 0){
+ assert(s->mv_dir & MV_DIRECT);
h263_encode_motion(s, motion_x, 1);
h263_encode_motion(s, motion_y, 1);
s->b_count++;
s->f_count++;
- break;
- case 1: /* bidir */
- h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
- h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
- h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
- h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
- s->last_mv[0][0][0]= s->mv[0][0][0];
- s->last_mv[0][0][1]= s->mv[0][0][1];
- s->last_mv[1][0][0]= s->mv[1][0][0];
- s->last_mv[1][0][1]= s->mv[1][0][1];
- s->b_count++;
- s->f_count++;
- break;
- case 2: /* backward */
- h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
- h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
- s->last_mv[1][0][0]= motion_x;
- s->last_mv[1][0][1]= motion_y;
- s->b_count++;
- break;
- case 3: /* forward */
- h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
- h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
- s->last_mv[0][0][0]= motion_x;
- s->last_mv[0][0][1]= motion_y;
- s->f_count++;
- break;
- default:
- av_log(s->avctx, AV_LOG_ERROR, "unknown mb type\n");
- return;
+ }else{
+ assert(mb_type > 0 && mb_type < 4);
+ if(s->mv_type != MV_TYPE_FIELD){
+ if(s->mv_dir & MV_DIR_FORWARD){
+ h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
+ h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+ s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0];
+ s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1];
+ s->f_count++;
+ }
+ if(s->mv_dir & MV_DIR_BACKWARD){
+ h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
+ h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+ s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0];
+ s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1];
+ s->b_count++;
+ }
+ }else{
+ if(s->mv_dir & MV_DIR_FORWARD){
+ put_bits(&s->pb, 1, s->field_select[0][0]);
+ put_bits(&s->pb, 1, s->field_select[0][1]);
+ }
+ if(s->mv_dir & MV_DIR_BACKWARD){
+ put_bits(&s->pb, 1, s->field_select[1][0]);
+ put_bits(&s->pb, 1, s->field_select[1][1]);
+ }
+ if(s->mv_dir & MV_DIR_FORWARD){
+ for(i=0; i<2; i++){
+ h263_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code);
+ h263_encode_motion(s, s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code);
+ s->last_mv[0][i][0]= s->mv[0][i][0];
+ s->last_mv[0][i][1]= s->mv[0][i][1]*2;
+ }
+ s->f_count++;
+ }
+ if(s->mv_dir & MV_DIR_BACKWARD){
+ for(i=0; i<2; i++){
+ h263_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0] , s->b_code);
+ h263_encode_motion(s, s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code);
+ s->last_mv[1][i][0]= s->mv[1][i][0];
+ s->last_mv[1][i][1]= s->mv[1][i][1]*2;
+ }
+ s->b_count++;
+ }
+ }
}
if(interleaved_stats){
@@ -861,6 +877,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(interleaved_stats){
s->p_tex_bits+= get_bits_diff(s);
}
+
}else{ /* s->pict_type==B_TYPE */
cbp= get_p_cbp(s, block, motion_x, motion_y);
@@ -889,7 +906,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(pic==NULL || pic->pict_type!=B_TYPE) break;
b_pic= pic->data[0] + offset + 16; //FIXME +16
- diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize);
+ diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
if(diff>s->qscale*70){ //FIXME check that 70 is optimal
s->mb_skiped=0;
break;
@@ -929,7 +946,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(!s->progressive_sequence){
if(cbp)
put_bits(pb2, 1, s->interlaced_dct);
- put_bits(pb2, 1, 0); // no interlaced ME yet
+ put_bits(pb2, 1, 0);
}
if(interleaved_stats){
@@ -941,7 +958,38 @@ void mpeg4_encode_mb(MpegEncContext * s,
h263_encode_motion(s, motion_x - pred_x, s->f_code);
h263_encode_motion(s, motion_y - pred_y, s->f_code);
+ }else if(s->mv_type==MV_TYPE_FIELD){
+ if(s->dquant) cbpc+= 8;
+ put_bits(&s->pb,
+ inter_MCBPC_bits[cbpc],
+ inter_MCBPC_code[cbpc]);
+
+ put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+ if(s->dquant)
+ put_bits(pb2, 2, dquant_code[s->dquant+2]);
+
+ assert(!s->progressive_sequence);
+ if(cbp)
+ put_bits(pb2, 1, s->interlaced_dct);
+ put_bits(pb2, 1, 1);
+
+ if(interleaved_stats){
+ s->misc_bits+= get_bits_diff(s);
+ }
+
+ /* motion vectors: 16x8 interlaced mode */
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+ pred_y /=2;
+
+ put_bits(&s->pb, 1, s->field_select[0][0]);
+ put_bits(&s->pb, 1, s->field_select[0][1]);
+
+ h263_encode_motion(s, s->mv[0][0][0] - pred_x, s->f_code);
+ h263_encode_motion(s, s->mv[0][0][1] - pred_y, s->f_code);
+ h263_encode_motion(s, s->mv[0][1][0] - pred_x, s->f_code);
+ h263_encode_motion(s, s->mv[0][1][1] - pred_y, s->f_code);
}else{
+ assert(s->mv_type==MV_TYPE_8X8);
put_bits(&s->pb,
inter_MCBPC_bits[cbpc+16],
inter_MCBPC_code[cbpc+16]);
diff --git a/libavcodec/h263data.h b/libavcodec/h263data.h
index 25435adb59..4da105ffc5 100644
--- a/libavcodec/h263data.h
+++ b/libavcodec/h263data.h
@@ -61,8 +61,8 @@ static const int h263_mb_type_b_map[15]= {
MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_16x16,
MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16,
0, //stuffing
- MB_TYPE_INTRA | MB_TYPE_CBP,
- MB_TYPE_INTRA | MB_TYPE_CBP | MB_TYPE_QUANT,
+ MB_TYPE_INTRA4x4 | MB_TYPE_CBP,
+ MB_TYPE_INTRA4x4 | MB_TYPE_CBP | MB_TYPE_QUANT,
};
const uint8_t cbpc_b_tab[4][2] = {
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index faafca223f..341aa0a235 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -687,10 +687,10 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) {
return tmp;
}
-static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) {
+static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
asm volatile (
- "movl $16,%%ecx\n"
+ "movl %4,%%ecx\n"
"pxor %%mm0,%%mm0\n" /* mm0 = 0 */
"pxor %%mm7,%%mm7\n" /* mm7 holds the sum */
"1:\n"
@@ -741,7 +741,9 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) {
"psrlq $32, %%mm7\n" /* shift hi dword to lo */
"paddd %%mm7,%%mm1\n"
"movd %%mm1,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp) : "r" (line_size) : "%ecx");
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "r" (line_size) , "m" (h)
+ : "%ecx");
return tmp;
}
@@ -866,9 +868,11 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
"movq "#c", "#o"+32(%1) \n\t"\
"movq "#d", "#o"+48(%1) \n\t"\
-static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){
+static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
uint64_t temp[16] __align8;
int sum=0;
+
+ assert(h==8);
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -951,9 +955,11 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride)
return sum&0xFFFF;
}
-static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride){
+static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
uint64_t temp[16] __align8;
int sum=0;
+
+ assert(h==8);
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -1037,8 +1043,8 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
}
-WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
-WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
+WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
+WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
#endif //CONFIG_ENCODERS
#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/i386/motion_est_mmx.c
index d71453a4bd..c36d081b1b 100644
--- a/libavcodec/i386/motion_est_mmx.c
+++ b/libavcodec/i386/motion_est_mmx.c
@@ -28,9 +28,9 @@ static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
static __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL;
-static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -64,9 +64,9 @@ static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
);
}
-static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -88,7 +88,7 @@ static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -114,7 +114,7 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ //FIXME reuse src
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"movq "MANGLE(bone)", %%mm5 \n\t"
@@ -151,7 +151,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -189,7 +189,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- int len= -(stride<<h);
+ int len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
@@ -265,85 +265,69 @@ static inline int sum_mmx2(void)
#define PIX_SAD(suf)\
-static int pix_abs8x8_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
- sad8_ ## suf(blk1, blk2, stride, 3);\
+ sad8_1_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
-static int sad8x8_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
-{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
- "pxor %%mm6, %%mm6 \n\t":);\
-\
- sad8_ ## suf(blk1, blk2, stride, 3);\
-\
- return sum_ ## suf();\
-}\
-\
-static int pix_abs8x8_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 3);\
+ sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
-static int pix_abs8x8_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\
+ sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
-static int pix_abs8x8_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
+ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[2]) \
);\
\
- sad8_4_ ## suf(blk1, blk2, stride, 3);\
+ sad8_4_ ## suf(blk1, blk2, stride, 8);\
\
return sum_ ## suf();\
}\
\
-static int pix_abs16x16_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
-{\
- asm volatile("pxor %%mm7, %%mm7 \n\t"\
- "pxor %%mm6, %%mm6 \n\t":);\
-\
- sad8_ ## suf(blk1 , blk2 , stride, 4);\
- sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
-\
- return sum_ ## suf();\
-}\
-static int sad16x16_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
- sad8_ ## suf(blk1 , blk2 , stride, 4);\
- sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
+ sad8_1_ ## suf(blk1 , blk2 , stride, h);\
+ sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
-static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
@@ -351,12 +335,12 @@ static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\
- sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\
+ sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, h);\
+ sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
-static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
@@ -364,12 +348,12 @@ static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
:: "m"(round_tab[1]) \
);\
\
- sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\
- sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\
+ sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, h);\
+ sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
-static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
+static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
@@ -377,8 +361,8 @@ static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
:: "m"(round_tab[2]) \
);\
\
- sad8_4_ ## suf(blk1 , blk2 , stride, 4);\
- sad8_4_ ## suf(blk1+8, blk2+8, stride, 4);\
+ sad8_4_ ## suf(blk1 , blk2 , stride, h);\
+ sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\
\
return sum_ ## suf();\
}\
@@ -389,32 +373,32 @@ PIX_SAD(mmx2)
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
{
if (mm_flags & MM_MMX) {
- c->pix_abs16x16 = pix_abs16x16_mmx;
- c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
- c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
- c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
- c->pix_abs8x8 = pix_abs8x8_mmx;
- c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
- c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
- c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
+ c->pix_abs[0][0] = sad16_mmx;
+ c->pix_abs[0][1] = sad16_x2_mmx;
+ c->pix_abs[0][2] = sad16_y2_mmx;
+ c->pix_abs[0][3] = sad16_xy2_mmx;
+ c->pix_abs[1][0] = sad8_mmx;
+ c->pix_abs[1][1] = sad8_x2_mmx;
+ c->pix_abs[1][2] = sad8_y2_mmx;
+ c->pix_abs[1][3] = sad8_xy2_mmx;
- c->sad[0]= sad16x16_mmx;
- c->sad[1]= sad8x8_mmx;
+ c->sad[0]= sad16_mmx;
+ c->sad[1]= sad8_mmx;
}
if (mm_flags & MM_MMXEXT) {
- c->pix_abs16x16 = pix_abs16x16_mmx2;
- c->pix_abs8x8 = pix_abs8x8_mmx2;
+ c->pix_abs[0][0] = sad16_mmx2;
+ c->pix_abs[1][0] = sad8_mmx2;
- c->sad[0]= sad16x16_mmx2;
- c->sad[1]= sad8x8_mmx2;
+ c->sad[0]= sad16_mmx2;
+ c->sad[1]= sad8_mmx2;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
- c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
- c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
- c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
- c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
- c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
+ c->pix_abs[0][1] = sad16_x2_mmx2;
+ c->pix_abs[0][2] = sad16_y2_mmx2;
+ c->pix_abs[0][3] = sad16_xy2_mmx2;
+ c->pix_abs[1][1] = sad8_x2_mmx2;
+ c->pix_abs[1][2] = sad8_y2_mmx2;
+ c->pix_abs[1][3] = sad8_xy2_mmx2;
}
}
}
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 1a449cedd0..cfdbea9c0c 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -46,9 +46,9 @@
static inline int sad_hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, Picture *picture,
- int n, int size, uint8_t * const mv_penalty);
+ int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[6], int stride, int uvstride,
+ int size, int h, uint8_t * const mv_penalty);
static inline int update_map_generation(MpegEncContext * s)
{
@@ -78,20 +78,21 @@ static int minima_cmp(const void *a, const void *b){
#define RENAME(a) simple_ ## a
#define CMP(d, x, y, size)\
-d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);
#define CMP_HPEL(d, dx, dy, x, y, size)\
{\
const int dxy= (dx) + 2*(dy);\
- hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
- d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+ hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
}
+
#define CMP_QPEL(d, dx, dy, x, y, size)\
{\
const int dxy= (dx) + 4*(dy);\
qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
- d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
}
#include "motion_est_template.c"
@@ -105,29 +106,29 @@ d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
#define RENAME(a) simple_chroma_ ## a
#define CMP(d, x, y, size)\
-d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);\
if(chroma_cmp){\
int dxy= ((x)&1) + 2*((y)&1);\
int c= ((x)>>1) + ((y)>>1)*uvstride;\
\
- chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
- d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\
- chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
- d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\
+ chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
+ d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride, h>>1);\
+ chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
+ d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride, h>>1);\
}
#define CMP_HPEL(d, dx, dy, x, y, size)\
{\
const int dxy= (dx) + 2*(dy);\
- hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
- d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+ hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
if(chroma_cmp_sub){\
int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
int c= ((x)>>1) + ((y)>>1)*uvstride;\
- chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
- d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
- chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
- d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
}\
}
@@ -135,7 +136,7 @@ if(chroma_cmp){\
{\
const int dxy= (dx) + 4*(dy);\
qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
- d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
if(chroma_cmp_sub){\
int cxy, c;\
int cx= (4*(x) + (dx))/2;\
@@ -144,10 +145,10 @@ if(chroma_cmp){\
cy= (cy>>1)|(cy&1);\
cxy= (cx&1) + 2*(cy&1);\
c= ((cx)>>1) + ((cy)>>1)*uvstride;\
- chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
- d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
- chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
- d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
}\
}
@@ -178,7 +179,7 @@ if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*yma
\
uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
- hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\
+ hpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 8);\
}\
}else{\
int fx = s->me.direct_basis_mv[0][0] + hx;\
@@ -198,9 +199,9 @@ if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*yma
assert((by>>1) + 16*s->mb_y <= s->height);\
\
hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
- hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\
+ hpel_avg[0][bxy](s->me.scratchpad, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 16);\
}\
- d = cmp_func(s, s->me.scratchpad, src_y, stride);\
+ d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
}else\
d= 256*256*256*32;
@@ -238,7 +239,7 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma
\
uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
- qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
+ qpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>2) + (by>>2)*(stride), stride);\
}\
}else{\
int fx = s->me.direct_basis_mv[0][0] + qx;\
@@ -252,12 +253,12 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma
qpel_put[1][fxy](s->me.scratchpad + 8 , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 , stride);\
qpel_put[1][fxy](s->me.scratchpad + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8*stride, stride);\
qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\
- qpel_avg[1][bxy](s->me.scratchpad , (ref2_y) + (bx>>2) + (by>>2)*(stride) , stride);\
- qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\
- qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\
- qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\
+ qpel_avg[1][bxy](s->me.scratchpad , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) , stride);\
+ qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\
+ qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\
+ qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\
}\
- d = cmp_func(s, s->me.scratchpad, src_y, stride);\
+ d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
}else\
d= 256*256*256*32;
@@ -277,7 +278,7 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma
#undef CMP__DIRECT
-static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
return 0;
}
@@ -285,44 +286,37 @@ static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
DSPContext* c= &s->dsp;
int i;
- memset(cmp, 0, sizeof(void*)*11);
-
- switch(type&0xFF){
- case FF_CMP_SAD:
- cmp[0]= c->sad[0];
- cmp[1]= c->sad[1];
- break;
- case FF_CMP_SATD:
- cmp[0]= c->hadamard8_diff[0];
- cmp[1]= c->hadamard8_diff[1];
- break;
- case FF_CMP_SSE:
- cmp[0]= c->sse[0];
- cmp[1]= c->sse[1];
- break;
- case FF_CMP_DCT:
- cmp[0]= c->dct_sad[0];
- cmp[1]= c->dct_sad[1];
- break;
- case FF_CMP_PSNR:
- cmp[0]= c->quant_psnr[0];
- cmp[1]= c->quant_psnr[1];
- break;
- case FF_CMP_BIT:
- cmp[0]= c->bit[0];
- cmp[1]= c->bit[1];
- break;
- case FF_CMP_RD:
- cmp[0]= c->rd[0];
- cmp[1]= c->rd[1];
- break;
- case FF_CMP_ZERO:
- for(i=0; i<7; i++){
+ memset(cmp, 0, sizeof(void*)*5);
+
+ for(i=0; i<4; i++){
+ switch(type&0xFF){
+ case FF_CMP_SAD:
+ cmp[i]= c->sad[i];
+ break;
+ case FF_CMP_SATD:
+ cmp[i]= c->hadamard8_diff[i];
+ break;
+ case FF_CMP_SSE:
+ cmp[i]= c->sse[i];
+ break;
+ case FF_CMP_DCT:
+ cmp[i]= c->dct_sad[i];
+ break;
+ case FF_CMP_PSNR:
+ cmp[i]= c->quant_psnr[i];
+ break;
+ case FF_CMP_BIT:
+ cmp[i]= c->bit[i];
+ break;
+ case FF_CMP_RD:
+ cmp[i]= c->rd[i];
+ break;
+ case FF_CMP_ZERO:
cmp[i]= zero_cmp;
+ break;
+ default:
+ av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n");
}
- break;
- default:
- av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n");
}
}
@@ -362,7 +356,7 @@ void ff_init_me(MpegEncContext *s){
else if( s->avctx->me_sub_cmp == FF_CMP_SAD
&& s->avctx-> me_cmp == FF_CMP_SAD
&& s->avctx-> mb_cmp == FF_CMP_SAD)
- s->me.sub_motion_search= sad_hpel_motion_search;
+ s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
else
s->me.sub_motion_search= simple_hpel_motion_search;
}
@@ -370,9 +364,11 @@ void ff_init_me(MpegEncContext *s){
if(s->avctx->me_cmp&FF_CMP_CHROMA){
s->me.motion_search[0]= simple_chroma_epzs_motion_search;
s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
+ s->me.motion_search[4]= simple_chroma_epzs_motion_search2;
}else{
s->me.motion_search[0]= simple_epzs_motion_search;
s->me.motion_search[1]= simple_epzs_motion_search4;
+ s->me.motion_search[4]= simple_epzs_motion_search2;
}
if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){
@@ -453,8 +449,8 @@ static int full_motion_search(MpegEncContext * s,
my = 0;
for (y = y1; y <= y2; y++) {
for (x = x1; x <= x2; x++) {
- d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
- s->linesize);
+ d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
+ s->linesize, 16);
if (d < dmin ||
(d == dmin &&
(abs(x - xx) + abs(y - yy)) <
@@ -518,7 +514,7 @@ static int log_motion_search(MpegEncContext * s,
do {
for (y = y1; y <= y2; y += range) {
for (x = x1; x <= x2; x += range) {
- d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
+ d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
@@ -598,7 +594,7 @@ static int phods_motion_search(MpegEncContext * s,
lastx = x;
for (x = x1; x <= x2; x += range) {
- d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
+ d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminx = d;
mx = x;
@@ -607,7 +603,7 @@ static int phods_motion_search(MpegEncContext * s,
x = lastx;
for (y = y1; y <= y2; y += range) {
- d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
+ d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminy = d;
my = y;
@@ -651,35 +647,25 @@ static int phods_motion_search(MpegEncContext * s,
#define CHECK_SAD_HALF_MV(suffix, x, y) \
{\
- d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
+ d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
static inline int sad_hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, Picture *picture,
- int n, int size, uint8_t * const mv_penalty)
+ int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[6], int stride, int uvstride,
+ int size, int h, uint8_t * const mv_penalty)
{
- uint8_t *ref_picture= picture->data[0];
uint32_t *score_map= s->me.score_map;
const int penalty_factor= s->me.sub_penalty_factor;
- int mx, my, xx, yy, dminh;
+ int mx, my, dminh;
uint8_t *pix, *ptr;
- op_pixels_abs_func pix_abs_x2;
- op_pixels_abs_func pix_abs_y2;
- op_pixels_abs_func pix_abs_xy2;
-
- if(size==0){
- pix_abs_x2 = s->dsp.pix_abs16x16_x2;
- pix_abs_y2 = s->dsp.pix_abs16x16_y2;
- pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
- }else{
- pix_abs_x2 = s->dsp.pix_abs8x8_x2;
- pix_abs_y2 = s->dsp.pix_abs8x8_y2;
- pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
- }
+ const int xmin= s->me.xmin;
+ const int ymin= s->me.ymin;
+ const int xmax= s->me.xmax;
+ const int ymax= s->me.ymax;
if(s->me.skip){
// printf("S");
@@ -689,13 +675,11 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
}
// printf("N");
- xx = 16 * s->mb_x + 8*(n&1);
- yy = 16 * s->mb_y + 8*(n>>1);
- pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
+ pix = src_data[0];
mx = *mx_ptr;
my = *my_ptr;
- ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
+ ptr = ref_data[0] + (my * stride) + mx;
dminh = dmin;
@@ -715,16 +699,16 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
pen_x= pred_x + mx;
pen_y= pred_y + my;
- ptr-= s->linesize;
+ ptr-= stride;
if(t<=b){
CHECK_SAD_HALF_MV(y2 , 0, -1)
if(l<=r){
CHECK_SAD_HALF_MV(xy2, -1, -1)
if(t+r<=b+l){
CHECK_SAD_HALF_MV(xy2, +1, -1)
- ptr+= s->linesize;
+ ptr+= stride;
}else{
- ptr+= s->linesize;
+ ptr+= stride;
CHECK_SAD_HALF_MV(xy2, -1, +1)
}
CHECK_SAD_HALF_MV(x2 , -1, 0)
@@ -732,9 +716,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
CHECK_SAD_HALF_MV(xy2, +1, -1)
if(t+l<=b+r){
CHECK_SAD_HALF_MV(xy2, -1, -1)
- ptr+= s->linesize;
+ ptr+= stride;
}else{
- ptr+= s->linesize;
+ ptr+= stride;
CHECK_SAD_HALF_MV(xy2, +1, +1)
}
CHECK_SAD_HALF_MV(x2 , +1, 0)
@@ -743,9 +727,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
if(l<=r){
if(t+l<=b+r){
CHECK_SAD_HALF_MV(xy2, -1, -1)
- ptr+= s->linesize;
+ ptr+= stride;
}else{
- ptr+= s->linesize;
+ ptr+= stride;
CHECK_SAD_HALF_MV(xy2, +1, +1)
}
CHECK_SAD_HALF_MV(x2 , -1, 0)
@@ -753,9 +737,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
}else{
if(t+r<=b+l){
CHECK_SAD_HALF_MV(xy2, +1, -1)
- ptr+= s->linesize;
+ ptr+= stride;
}else{
- ptr+= s->linesize;
+ ptr+= stride;
CHECK_SAD_HALF_MV(xy2, -1, +1)
}
CHECK_SAD_HALF_MV(x2 , +1, 0)
@@ -802,35 +786,41 @@ static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
/**
* get fullpel ME search limits.
- * @param range the approximate search range for the old ME code, unused for EPZS and newer
*/
-static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax)
+static inline void get_limits(MpegEncContext *s, int x, int y)
{
- if(s->avctx->me_range) *range= s->avctx->me_range >> 1;
- else *range= 16;
-
+/*
+ if(s->avctx->me_range) s->me.range= s->avctx->me_range >> 1;
+ else s->me.range= 16;
+*/
if (s->unrestricted_mv) {
- *xmin = -16;
- *ymin = -16;
- *xmax = s->mb_width*16;
- *ymax = s->mb_height*16;
+ s->me.xmin = - x - 16;
+ s->me.ymin = - y - 16;
+ s->me.xmax = - x + s->mb_width *16;
+ s->me.ymax = - y + s->mb_height*16;
} else {
- *xmin = 0;
- *ymin = 0;
- *xmax = s->mb_width*16 - 16;
- *ymax = s->mb_height*16 - 16;
+ s->me.xmin = - x;
+ s->me.ymin = - y;
+ s->me.xmax = - x + s->mb_width *16 - 16;
+ s->me.ymax = - y + s->mb_height*16 - 16;
}
-
- //FIXME try to limit x/y min/max if me_range is set
}
-static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
+static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
{
+ const int size= 1;
+ const int h=8;
int block;
int P[10][2];
int dmin_sum=0, mx4_sum=0, my4_sum=0;
uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
int same=1;
+ const int stride= s->linesize;
+ const int uvstride= s->uvlinesize;
+ const int xmin= s->me.xmin;
+ const int ymin= s->me.ymin;
+ const int xmax= s->me.xmax;
+ const int ymax= s->me.ymax;
for(block=0; block<4; block++){
int mx4, my4;
@@ -839,23 +829,23 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
static const int off[4]= {2, 1, 1, -1};
const int mot_stride = s->block_wrap[0];
const int mot_xy = s->block_index[block];
-// const int block_x= (block&1);
-// const int block_y= (block>>1);
-#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way
- const int rel_xmin4= xmin;
- const int rel_xmax4= xmax;
- const int rel_ymin4= ymin;
- const int rel_ymax4= ymax;
-#else
- const int rel_xmin4= xmin - block_x*8;
- const int rel_xmax4= xmax - block_x*8 + 8;
- const int rel_ymin4= ymin - block_y*8;
- const int rel_ymax4= ymax - block_y*8 + 8;
-#endif
+ const int block_x= (block&1);
+ const int block_y= (block>>1);
+ uint8_t *src_data[3]= {
+ s->new_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
+ s->new_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
+ s->new_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
+ };
+ uint8_t *ref_data[3]= {
+ s->last_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
+ s->last_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
+ s->last_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
+ };
+
P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
- if(P_LEFT[0] > (rel_xmax4<<shift)) P_LEFT[0] = (rel_xmax4<<shift);
+ if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
/* special case for first line */
if (s->mb_y == 0 && block<2) {
@@ -866,10 +856,10 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
- if(P_TOP[1] > (rel_ymax4<<shift)) P_TOP[1] = (rel_ymax4<<shift);
- if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift);
- if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift);
- if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift);
+ if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1] = (s->me.ymax<<shift);
+ if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
+ if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
+ if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
@@ -887,33 +877,33 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
P_MV1[0]= mx;
P_MV1[1]= my;
- dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
- &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
+ dmin4 = s->me.motion_search[1](s, &mx4, &my4, P, pred_x4, pred_y4,
+ src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
- dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
- pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
+ dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4,
+ pred_x4, pred_y4, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
int dxy;
- const int offset= ((block&1) + (block>>1)*s->linesize)*8;
+ const int offset= ((block&1) + (block>>1)*stride)*8;
uint8_t *dest_y = s->me.scratchpad + offset;
if(s->quarter_sample){
- uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset;
+ uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride + offset;
dxy = ((my4 & 3) << 2) | (mx4 & 3);
if(s->no_rounding)
s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize);
else
- s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , s->linesize);
+ s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride);
}else{
- uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset;
+ uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride + offset;
dxy = ((my4 & 1) << 1) | (mx4 & 1);
if(s->no_rounding)
- s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , s->linesize, 8);
+ s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , stride, h);
else
- s->dsp.put_pixels_tab [1][dxy](dest_y , ref , s->linesize, 8);
+ s->dsp.put_pixels_tab [1][dxy](dest_y , ref , stride, h);
}
dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor;
}else
@@ -937,7 +927,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
return INT_MAX;
if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
- dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize);
+ dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, s->me.scratchpad, stride, 16);
}
if(s->avctx->mb_cmp&FF_CMP_CHROMA){
@@ -959,8 +949,8 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8);
}
- dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize);
- dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize);
+ dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize, 8);
+ dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8);
}
switch(s->avctx->mb_cmp&0xFF){
@@ -973,13 +963,134 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma
}
}
+static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint8_t *frame_ref_data[3],
+ int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int f_code, int mx, int my)
+{
+ const int size=0;
+ const int h=8;
+ int block;
+ int P[10][2];
+ uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
+ int same=1;
+ const int stride= 2*s->linesize;
+ const int uvstride= 2*s->uvlinesize;
+ int dmin_sum= 0;
+ const int mot_stride= s->mb_stride;
+ const int xy= s->mb_x + s->mb_y*mot_stride;
+
+ s->me.ymin>>=1;
+ s->me.ymax>>=1;
+
+ for(block=0; block<2; block++){
+ int field_select;
+ int best_dmin= INT_MAX;
+ int best_field= -1;
+
+ uint8_t *src_data[3]= {
+ frame_src_data[0] + s-> linesize*block,
+ frame_src_data[1] + s->uvlinesize*block,
+ frame_src_data[2] + s->uvlinesize*block
+ };
+
+ for(field_select=0; field_select<2; field_select++){
+ int dmin, mx_i, my_i, pred_x, pred_y;
+ uint8_t *ref_data[3]= {
+ frame_ref_data[0] + s-> linesize*field_select,
+ frame_ref_data[1] + s->uvlinesize*field_select,
+ frame_ref_data[2] + s->uvlinesize*field_select
+ };
+ int16_t (*mv_table)[2]= mv_tables[block][field_select];
+
+ P_LEFT[0] = mv_table[xy - 1][0];
+ P_LEFT[1] = mv_table[xy - 1][1];
+ if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1);
+
+ pred_x= P_LEFT[0];
+ pred_y= P_LEFT[1];
+
+ if(s->mb_y){
+ P_TOP[0] = mv_table[xy - mot_stride][0];
+ P_TOP[1] = mv_table[xy - mot_stride][1];
+ P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
+ P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
+ if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1);
+ if(P_TOPRIGHT[0] < (s->me.xmin<<1)) P_TOPRIGHT[0]= (s->me.xmin<<1);
+ if(P_TOPRIGHT[0] > (s->me.xmax<<1)) P_TOPRIGHT[0]= (s->me.xmax<<1);
+ if(P_TOPRIGHT[1] > (s->me.ymax<<1)) P_TOPRIGHT[1]= (s->me.ymax<<1);
+
+ P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+ P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+ }
+ P_MV1[0]= mx; //FIXME not correct if block != field_select
+ P_MV1[1]= my / 2;
+
+ dmin = s->me.motion_search[4](s, &mx_i, &my_i, P, pred_x, pred_y,
+ src_data, ref_data, stride, uvstride, mv_table, (1<<16)>>1, mv_penalty);
+
+ dmin= s->me.sub_motion_search(s, &mx_i, &my_i, dmin,
+ pred_x, pred_y, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
+
+ mv_table[xy][0]= mx_i;
+ mv_table[xy][1]= my_i;
+
+ if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
+ int dxy;
+
+ //FIXME chroma ME
+ uint8_t *ref= ref_data[0] + (mx_i>>1) + (my_i>>1)*stride;
+ dxy = ((my_i & 1) << 1) | (mx_i & 1);
+
+ if(s->no_rounding){
+ s->dsp.put_no_rnd_pixels_tab[size][dxy](s->me.scratchpad, ref , stride, h);
+ }else{
+ s->dsp.put_pixels_tab [size][dxy](s->me.scratchpad, ref , stride, h);
+ }
+ dmin= s->dsp.mb_cmp[size](s, src_data[0], s->me.scratchpad, stride, h);
+ dmin+= (mv_penalty[mx_i-pred_x] + mv_penalty[my_i-pred_y] + 1)*s->me.mb_penalty_factor;
+ }else
+ dmin+= s->me.mb_penalty_factor; //field_select bits
+
+ dmin += field_select != block; //slightly prefer same field
+
+ if(dmin < best_dmin){
+ best_dmin= dmin;
+ best_field= field_select;
+ }
+ }
+ {
+ int16_t (*mv_table)[2]= mv_tables[block][best_field];
+
+ if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
+ if(mv_table[xy][1]&1) same=0;
+ if(mv_table[xy][1]*2 != my) same=0;
+ if(best_field != block) same=0;
+ }
+
+ field_select_tables[block][xy]= best_field;
+ dmin_sum += best_dmin;
+ }
+
+ s->me.ymin<<=1;
+ s->me.ymax<<=1;
+
+ if(same)
+ return INT_MAX;
+
+ switch(s->avctx->mb_cmp&0xFF){
+ /*case FF_CMP_SSE:
+ return dmin_sum+ 32*s->qscale*s->qscale;*/
+ case FF_CMP_RD:
+ return dmin_sum;
+ default:
+ return dmin_sum+ 11*s->me.mb_penalty_factor;
+ }
+}
+
void ff_estimate_p_frame_motion(MpegEncContext * s,
int mb_x, int mb_y)
{
uint8_t *pix, *ppix;
- int sum, varc, vard, mx, my, range, dmin, xx, yy;
- int xmin, ymin, xmax, ymax;
- int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
+ int sum, varc, vard, mx, my, dmin, xx, yy;
int pred_x=0, pred_y=0;
int P[10][2];
const int shift= 1+s->quarter_sample;
@@ -987,18 +1098,26 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
uint8_t *ref_picture= s->last_picture.data[0];
Picture * const pic= &s->current_picture;
uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
-
+ const int stride= s->linesize;
+ const int uvstride= s->uvlinesize;
+ uint8_t *src_data[3]= {
+ s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
+ s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
+ s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
+ };
+ uint8_t *ref_data[3]= {
+ s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
+ s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
+ s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
+ };
+
assert(s->quarter_sample==0 || s->quarter_sample==1);
s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
- get_limits(s, &range, &xmin, &ymin, &xmax, &ymax);
- rel_xmin= xmin - mb_x*16;
- rel_xmax= xmax - mb_x*16;
- rel_ymin= ymin - mb_y*16;
- rel_ymax= ymax - mb_y*16;
+ get_limits(s, 16*mb_x, 16*mb_y);
s->me.skip=0;
switch(s->me_method) {
@@ -1009,21 +1128,23 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
my-= mb_y*16;
dmin = 0;
break;
+#if 0
case ME_FULL:
- dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
+ dmin = full_motion_search(s, &mx, &my, range, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_LOG:
- dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_PHODS:
- dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
+#endif
case ME_X1:
case ME_EPZS:
{
@@ -1033,16 +1154,16 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
- if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
+ if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
if(mb_y) {
P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
- if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1] = (rel_ymax<<shift);
- if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
- if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
+ if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1] = (s->me.ymax<<shift);
+ if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
+ if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
@@ -1060,8 +1181,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
}
}
- dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
+ dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y,
+ src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
break;
}
@@ -1070,14 +1191,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
xx = mb_x * 16;
yy = mb_y * 16;
- pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
+ pix = src_data[0];
/* At this point (mx,my) are full-pell and the relative displacement */
- ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
+ ppix = ref_data[0] + (my * s->linesize) + mx;
sum = s->dsp.pix_sum(pix, s->linesize);
varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
- vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8;
+ vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8;
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
pic->mb_var [s->mb_stride * mb_y + mb_x] = varc;
@@ -1099,47 +1220,59 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
s->scene_change_score+= s->qscale;
if (vard*2 + 200 > varc)
- mb_type|= MB_TYPE_INTRA;
+ mb_type|= CANDIDATE_MB_TYPE_INTRA;
if (varc*2 + 200 > vard){
- mb_type|= MB_TYPE_INTER;
- s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
+ mb_type|= CANDIDATE_MB_TYPE_INTER;
+ s->me.sub_motion_search(s, &mx, &my, dmin,
+ pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
if(s->flags&CODEC_FLAG_MV0)
if(mx || my)
- mb_type |= MB_TYPE_SKIPED; //FIXME check difference
+ mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference
}else{
mx <<=shift;
my <<=shift;
}
if((s->flags&CODEC_FLAG_4MV)
&& !s->me.skip && varc>50 && vard>10){
- if(h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift) < INT_MAX)
- mb_type|=MB_TYPE_INTER4V;
+ if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
+ mb_type|=CANDIDATE_MB_TYPE_INTER4V;
set_p_mv_tables(s, mx, my, 0);
}else
set_p_mv_tables(s, mx, my, 1);
+ if((s->flags&CODEC_FLAG_INTERLACED_ME)
+ && !s->me.skip){ //FIXME varc/d checks
+ if(interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my) < INT_MAX)
+ mb_type |= CANDIDATE_MB_TYPE_INTER_I;
+ }
}else{
int intra_score, i;
- mb_type= MB_TYPE_INTER;
+ mb_type= CANDIDATE_MB_TYPE_INTER;
- dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
-
+ dmin= s->me.sub_motion_search(s, &mx, &my, dmin,
+ pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
- dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty);
+ dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty);
if((s->flags&CODEC_FLAG_4MV)
&& !s->me.skip && varc>50 && vard>10){
- int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
+ int dmin4= h263_mv4_search(s, mx, my, shift);
if(dmin4 < dmin){
- mb_type= MB_TYPE_INTER4V;
+ mb_type= CANDIDATE_MB_TYPE_INTER4V;
dmin=dmin4;
}
}
+ if((s->flags&CODEC_FLAG_INTERLACED_ME)
+ && !s->me.skip){ //FIXME varc/d checks
+ int dmin_i= interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my);
+ if(dmin_i < dmin){
+ mb_type = CANDIDATE_MB_TYPE_INTER_I;
+ dmin= dmin_i;
+ }
+ }
// pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
- set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
+ set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
/* get intra luma score */
if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
@@ -1155,7 +1288,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
*(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
}
- intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize);
+ intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize, 16);
}
#if 0 //FIXME
/* get chroma score */
@@ -1184,8 +1317,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
intra_score += s->me.mb_penalty_factor*16;
if(intra_score < dmin){
- mb_type= MB_TYPE_INTRA;
- s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_INTRA; //FIXME cleanup
+ mb_type= CANDIDATE_MB_TYPE_INTRA;
+ s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
}else
s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
@@ -1202,30 +1335,36 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
int mb_x, int mb_y)
{
- int mx, my, range, dmin;
- int xmin, ymin, xmax, ymax;
- int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
+ int mx, my, dmin;
int pred_x=0, pred_y=0;
int P[10][2];
const int shift= 1+s->quarter_sample;
uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
const int xy= mb_x + mb_y*s->mb_stride;
+ const int stride= s->linesize;
+ const int uvstride= s->uvlinesize;
+ uint8_t *src_data[3]= {
+ s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
+ s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
+ s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
+ };
+ uint8_t *ref_data[3]= {
+ s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
+ s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
+ s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
+ };
assert(s->quarter_sample==0 || s->quarter_sample==1);
s->me.pre_penalty_factor = get_penalty_factor(s, s->avctx->me_pre_cmp);
- get_limits(s, &range, &xmin, &ymin, &xmax, &ymax);
- rel_xmin= xmin - mb_x*16;
- rel_xmax= xmax - mb_x*16;
- rel_ymin= ymin - mb_y*16;
- rel_ymax= ymax - mb_y*16;
+ get_limits(s, 16*mb_x, 16*mb_y);
s->me.skip=0;
P_LEFT[0] = s->p_mv_table[xy + 1][0];
P_LEFT[1] = s->p_mv_table[xy + 1][1];
- if(P_LEFT[0] < (rel_xmin<<shift)) P_LEFT[0] = (rel_xmin<<shift);
+ if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift);
/* special case for first line */
if (mb_y == s->mb_height-1) {
@@ -1238,9 +1377,9 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
P_TOP[1] = s->p_mv_table[xy + s->mb_stride ][1];
P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
- if(P_TOP[1] < (rel_ymin<<shift)) P_TOP[1] = (rel_ymin<<shift);
- if(P_TOPRIGHT[0] > (rel_xmax<<shift)) P_TOPRIGHT[0]= (rel_xmax<<shift);
- if(P_TOPRIGHT[1] < (rel_ymin<<shift)) P_TOPRIGHT[1]= (rel_ymin<<shift);
+ if(P_TOP[1] < (s->me.ymin<<shift)) P_TOP[1] = (s->me.ymin<<shift);
+ if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
+ if(P_TOPRIGHT[1] < (s->me.ymin<<shift)) P_TOPRIGHT[1]= (s->me.ymin<<shift);
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
@@ -1248,8 +1387,8 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
pred_x = P_MEDIAN[0];
pred_y = P_MEDIAN[1];
}
- dmin = s->me.pre_motion_search(s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
+ dmin = s->me.pre_motion_search(s, &mx, &my, P, pred_x, pred_y,
+ src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
s->p_mv_table[xy][0] = mx<<shift;
s->p_mv_table[xy][1] = my<<shift;
@@ -1258,17 +1397,16 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
}
static int ff_estimate_motion_b(MpegEncContext * s,
- int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code)
+ int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride, int f_code)
{
- int mx, my, range, dmin;
- int xmin, ymin, xmax, ymax;
- int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
+ int mx, my, dmin;
int pred_x=0, pred_y=0;
int P[10][2];
const int shift= 1+s->quarter_sample;
const int mot_stride = s->mb_stride;
const int mot_xy = mb_y*mot_stride + mb_x;
- uint8_t * const ref_picture= picture->data[0];
+ uint8_t * const ref_picture= ref_data[0] - 16*s->mb_x - 16*s->mb_y*s->linesize; //FIXME ugly
uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
int mv_scale;
@@ -1276,11 +1414,7 @@ static int ff_estimate_motion_b(MpegEncContext * s,
s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
- get_limits(s, &range, &xmin, &ymin, &xmax, &ymax);
- rel_xmin= xmin - mb_x*16;
- rel_xmax= xmax - mb_x*16;
- rel_ymin= ymin - mb_y*16;
- rel_ymax= ymax - mb_y*16;
+ get_limits(s, 16*mb_x, 16*mb_y);
switch(s->me_method) {
case ME_ZERO:
@@ -1290,28 +1424,30 @@ static int ff_estimate_motion_b(MpegEncContext * s,
mx-= mb_x*16;
my-= mb_y*16;
break;
+#if 0
case ME_FULL:
- dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
+ dmin = full_motion_search(s, &mx, &my, range, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_LOG:
- dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_PHODS:
- dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+ dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
+#endif
case ME_X1:
case ME_EPZS:
{
P_LEFT[0] = mv_table[mot_xy - 1][0];
P_LEFT[1] = mv_table[mot_xy - 1][1];
- if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift);
+ if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
/* special case for first line */
if (mb_y) {
@@ -1319,9 +1455,9 @@ static int ff_estimate_motion_b(MpegEncContext * s,
P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
- if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift);
- if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
- if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
+ if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1]= (s->me.ymax<<shift);
+ if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
+ if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
@@ -1336,17 +1472,17 @@ static int ff_estimate_motion_b(MpegEncContext * s,
mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
}
- dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- picture, s->p_mv_table, mv_scale, mv_penalty);
+ dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y,
+ src_data, ref_data, stride, uvstride, s->p_mv_table, mv_scale, mv_penalty);
break;
}
- dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, picture, 0, 0, mv_penalty);
+ dmin= s->me.sub_motion_search(s, &mx, &my, dmin,
+ pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
- dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty);
+ dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty);
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
// s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
@@ -1356,16 +1492,18 @@ static int ff_estimate_motion_b(MpegEncContext * s,
return dmin;
}
-static inline int check_bidir_mv(MpegEncContext * s,
- int mb_x, int mb_y,
+static inline int check_bidir_mv(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
+ int stride, int uvstride,
int motion_fx, int motion_fy,
int motion_bx, int motion_by,
int pred_fx, int pred_fy,
- int pred_bx, int pred_by)
+ int pred_bx, int pred_by,
+ int size, int h)
{
//FIXME optimize?
//FIXME move into template?
//FIXME better f_code prediction (max mv & distance)
+ //FIXME pointers
uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
uint8_t *dest_y = s->me.scratchpad;
uint8_t *ptr;
@@ -1375,45 +1513,37 @@ static inline int check_bidir_mv(MpegEncContext * s,
if(s->quarter_sample){
dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
- src_x = mb_x * 16 + (motion_fx >> 2);
- src_y = mb_y * 16 + (motion_fy >> 2);
- assert(src_x >=-16 && src_x<=s->h_edge_pos);
- assert(src_y >=-16 && src_y<=s->v_edge_pos);
+ src_x = motion_fx >> 2;
+ src_y = motion_fy >> 2;
- ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
- s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
+ ptr = ref_data[0] + (src_y * stride) + src_x;
+ s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , stride);
dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
- src_x = mb_x * 16 + (motion_bx >> 2);
- src_y = mb_y * 16 + (motion_by >> 2);
- assert(src_x >=-16 && src_x<=s->h_edge_pos);
- assert(src_y >=-16 && src_y<=s->v_edge_pos);
+ src_x = motion_bx >> 2;
+ src_y = motion_by >> 2;
- ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
- s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
+ ptr = ref_data[3] + (src_y * stride) + src_x;
+ s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride);
}else{
dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
- src_x = mb_x * 16 + (motion_fx >> 1);
- src_y = mb_y * 16 + (motion_fy >> 1);
- assert(src_x >=-16 && src_x<=s->h_edge_pos);
- assert(src_y >=-16 && src_y<=s->v_edge_pos);
+ src_x = motion_fx >> 1;
+ src_y = motion_fy >> 1;
- ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
- s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
+ ptr = ref_data[0] + (src_y * stride) + src_x;
+ s->dsp.put_pixels_tab[size][dxy](dest_y , ptr , stride, h);
dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
- src_x = mb_x * 16 + (motion_bx >> 1);
- src_y = mb_y * 16 + (motion_by >> 1);
- assert(src_x >=-16 && src_x<=s->h_edge_pos);
- assert(src_y >=-16 && src_y<=s->v_edge_pos);
+ src_x = motion_bx >> 1;
+ src_y = motion_by >> 1;
- ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
- s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
+ ptr = ref_data[3] + (src_y * stride) + src_x;
+ s->dsp.avg_pixels_tab[size][dxy](dest_y , ptr , stride, h);
}
fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor
+(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor
- + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
+ + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
if(s->avctx->mb_cmp&FF_CMP_CHROMA){
}
@@ -1423,7 +1553,8 @@ static inline int check_bidir_mv(MpegEncContext * s,
}
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
-static inline int bidir_refine(MpegEncContext * s,
+static inline int bidir_refine(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
+ int stride, int uvstride,
int mb_x, int mb_y)
{
const int mot_stride = s->mb_stride;
@@ -1440,16 +1571,18 @@ static inline int bidir_refine(MpegEncContext * s,
//FIXME do refinement and add flag
- fbmin= check_bidir_mv(s, mb_x, mb_y,
+ fbmin= check_bidir_mv(s, src_data, ref_data, stride, uvstride,
motion_fx, motion_fy,
motion_bx, motion_by,
pred_fx, pred_fy,
- pred_bx, pred_by);
+ pred_bx, pred_by,
+ 0, 16);
return fbmin;
}
-static inline int direct_search(MpegEncContext * s,
+static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
+ int stride, int uvstride,
int mb_x, int mb_y)
{
int P[10][2];
@@ -1508,6 +1641,11 @@ static inline int direct_search(MpegEncContext * s,
return 256*256*256*64;
}
+
+ s->me.xmin= xmin;
+ s->me.ymin= ymin;
+ s->me.xmax= xmax;
+ s->me.ymax= ymax;
P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
@@ -1525,22 +1663,24 @@ static inline int direct_search(MpegEncContext * s,
//FIXME direct_search ptr in context!!! (needed for chroma anyway or this will get messy)
if(s->flags&CODEC_FLAG_QPEL){
- dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
- &s->last_picture, mv_table, 1<<14, mv_penalty);
- dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
- 0, 0, &s->last_picture, 0, 0, mv_penalty);
+ dmin = simple_direct_qpel_epzs_motion_search(s, &mx, &my, P, 0, 0,
+ src_data, ref_data, stride, uvstride, mv_table, 1<<14, mv_penalty);
+ dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin,
+ 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
- dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty);
+ dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
}else{
- dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
- &s->last_picture, mv_table, 1<<15, mv_penalty);
- dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
- 0, 0, &s->last_picture, 0, 0, mv_penalty);
+ dmin = simple_direct_hpel_epzs_motion_search(s, &mx, &my, P, 0, 0,
+ src_data, ref_data, stride, uvstride, mv_table, 1<<15, mv_penalty);
+ dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin,
+ 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
- dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty);
+ dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
}
+
+ get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed
s->b_direct_mv_table[mot_xy][0]= mx;
s->b_direct_mv_table[mot_xy][1]= my;
@@ -1551,40 +1691,80 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
int mb_x, int mb_y)
{
const int penalty_factor= s->me.mb_penalty_factor;
- int fmin, bmin, dmin, fbmin;
+ int fmin, bmin, dmin, fbmin, bimin, fimin;
int type=0;
+ const int stride= s->linesize;
+ const int uvstride= s->uvlinesize;
+ uint8_t *src_data[3]= {
+ s->new_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
+ s->new_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
+ s->new_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
+ };
+ uint8_t *ref_data[6]= {
+ s->last_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
+ s->last_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
+ s->last_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y),
+ s->next_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
+ s->next_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
+ s->next_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
+ };
s->me.skip=0;
if (s->codec_id == CODEC_ID_MPEG4)
- dmin= direct_search(s, mb_x, mb_y);
+ dmin= direct_search(s, src_data, ref_data, stride, uvstride, mb_x, mb_y);
else
dmin= INT_MAX;
-
+//FIXME penalty stuff for non mpeg4
s->me.skip=0;
- fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor;
+ fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, src_data,
+ ref_data, stride, uvstride, s->f_code) + 3*penalty_factor;
s->me.skip=0;
- bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor;
+ bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, src_data,
+ ref_data+3, stride, uvstride, s->b_code) + 2*penalty_factor;
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
s->me.skip=0;
- fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
+ fbmin= bidir_refine(s, src_data, ref_data, stride, uvstride, mb_x, mb_y) + penalty_factor;
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
+
+ if(s->flags & CODEC_FLAG_INTERLACED_ME){
+ const int xy = mb_y*s->mb_stride + mb_x;
+
+//FIXME mb type penalty
+ s->me.skip=0;
+ fimin= interlaced_search(s, src_data, ref_data ,
+ s->b_field_mv_table[0], s->b_field_select_table[0], s->f_code,
+ s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
+ bimin= interlaced_search(s, src_data, ref_data+3,
+ s->b_field_mv_table[1], s->b_field_select_table[1], s->b_code,
+ s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1]);
+ }else
+ fimin= bimin= INT_MAX;
+
{
int score= fmin;
- type = MB_TYPE_FORWARD;
+ type = CANDIDATE_MB_TYPE_FORWARD;
if (dmin <= score){
score = dmin;
- type = MB_TYPE_DIRECT;
+ type = CANDIDATE_MB_TYPE_DIRECT;
}
if(bmin<score){
score=bmin;
- type= MB_TYPE_BACKWARD;
+ type= CANDIDATE_MB_TYPE_BACKWARD;
}
if(fbmin<score){
score=fbmin;
- type= MB_TYPE_BIDIR;
+ type= CANDIDATE_MB_TYPE_BIDIR;
+ }
+ if(fimin<score){
+ score=fimin;
+ type= CANDIDATE_MB_TYPE_FORWARD_I;
+ }
+ if(bimin<score){
+ score=bimin;
+ type= CANDIDATE_MB_TYPE_BACKWARD_I;
}
score= ((unsigned)(score*score + 128*256))>>16;
@@ -1593,8 +1773,16 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
}
if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
- type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
- if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
+ type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
+ if(fimin < INT_MAX)
+ type |= CANDIDATE_MB_TYPE_FORWARD_I;
+ if(bimin < INT_MAX)
+ type |= CANDIDATE_MB_TYPE_BACKWARD_I;
+ if(fimin < INT_MAX && bimin < INT_MAX){
+ type |= CANDIDATE_MB_TYPE_BIDIR_I;
+ }
+ //FIXME something smarter
+ if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
}
s->mb_type[mb_y*s->mb_stride + mb_x]= type;
@@ -1661,24 +1849,6 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
- /* clip / convert to intra 16x16 type MVs */
- for(y=0; y<s->mb_height; y++){
- int x;
- int xy= y*s->mb_stride;
- for(x=0; x<s->mb_width; x++){
- if(s->mb_type[xy]&MB_TYPE_INTER){
- if( s->p_mv_table[xy][0] >=range || s->p_mv_table[xy][0] <-range
- || s->p_mv_table[xy][1] >=range || s->p_mv_table[xy][1] <-range){
- s->mb_type[xy] &= ~MB_TYPE_INTER;
- s->mb_type[xy] |= MB_TYPE_INTRA;
- s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
- s->p_mv_table[xy][0] = 0;
- s->p_mv_table[xy][1] = 0;
- }
- }
- xy++;
- }
- }
//printf("%d no:%d %d//\n", clip, noclip, f_code);
if(s->flags&CODEC_FLAG_4MV){
const int wrap= 2+ s->mb_width*2;
@@ -1690,7 +1860,7 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
int x;
for(x=0; x<s->mb_width; x++){
- if(s->mb_type[i]&MB_TYPE_INTER4V){
+ if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
int block;
for(block=0; block<4; block++){
int off= (block& 1) + (block>>1)*wrap;
@@ -1699,9 +1869,9 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
if( mx >=range || mx <-range
|| my >=range || my <-range){
- s->mb_type[i] &= ~MB_TYPE_INTER4V;
- s->mb_type[i] |= MB_TYPE_INTRA;
- s->current_picture.mb_type[i]= MB_TYPE_INTRA;
+ s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
+ s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
+ s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
}
}
}
@@ -1712,30 +1882,45 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
}
}
-void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type)
+/**
+ *
+ * @param truncate 1 for truncation, 0 for using intra
+ */
+void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
+ int16_t (*mv_table)[2], int f_code, int type, int truncate)
{
- int y;
+ int y, h_range, v_range;
// RAL: 8 in MPEG-1, 16 in MPEG-4
int range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code);
-
+
+ if(s->msmpeg4_version) range= 16;
if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
+ h_range= range;
+ v_range= field_select_table ? range>>1 : range;
+
/* clip / convert to intra 16x16 type MVs */
for(y=0; y<s->mb_height; y++){
int x;
int xy= y*s->mb_stride;
for(x=0; x<s->mb_width; x++){
if (s->mb_type[xy] & type){ // RAL: "type" test added...
- if( mv_table[xy][0] >=range || mv_table[xy][0] <-range
- || mv_table[xy][1] >=range || mv_table[xy][1] <-range){
-
- if(s->codec_id == CODEC_ID_MPEG1VIDEO && 0){
- }else{
- if (mv_table[xy][0] > range-1) mv_table[xy][0]= range-1;
- else if(mv_table[xy][0] < -range ) mv_table[xy][0]= -range;
- if (mv_table[xy][1] > range-1) mv_table[xy][1]= range-1;
- else if(mv_table[xy][1] < -range ) mv_table[xy][1]= -range;
+ if(field_select_table==NULL || field_select_table[xy] == field_select){
+ if( mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
+ || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){
+
+ if(truncate){
+ if (mv_table[xy][0] > h_range-1) mv_table[xy][0]= h_range-1;
+ else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
+ if (mv_table[xy][1] > v_range-1) mv_table[xy][1]= v_range-1;
+ else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
+ }else{
+ s->mb_type[xy] &= ~type;
+ s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
+ mv_table[xy][0]=
+ mv_table[xy][1]= 0;
+ }
}
}
}
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index db51d676db..0f3a6b4cdd 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -22,29 +22,31 @@
* @file motion_est_template.c
* Motion estimation template.
*/
-
+//FIXME ref2_y next_pic?
//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
//Note, the last line is there to kill these ugly unused var warnings
-#define LOAD_COMMON(x, y)\
+#define LOAD_COMMON\
uint32_t * const score_map= s->me.score_map;\
- const int stride= s->linesize;\
- const int uvstride= s->uvlinesize;\
const int time_pp= s->pp_time;\
const int time_pb= s->pb_time;\
- uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\
- uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
- uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
- uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\
- uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
- uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
- uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\
+ const int xmin= s->me.xmin;\
+ const int ymin= s->me.ymin;\
+ const int xmax= s->me.xmax;\
+ const int ymax= s->me.ymax;\
+ uint8_t * const src_y= src_data[0];\
+ uint8_t * const src_u= src_data[1];\
+ uint8_t * const src_v= src_data[2];\
+ uint8_t * const ref_y= ref_data[0];\
+ uint8_t * const ref_u= ref_data[1];\
+ uint8_t * const ref_v= ref_data[2];\
op_pixels_func (*hpel_put)[4];\
op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
op_pixels_func (*chroma_hpel_put)[4];\
qpel_mc_func (*qpel_put)[16];\
qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\
- + (size_t)ref2_y + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map;\
+ + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map\
+ + xmin + xmax + ymin + ymax;\
if(s->no_rounding /*FIXME b_type*/){\
hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
@@ -70,9 +72,8 @@
#if 0
static int RENAME(hpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, Picture *ref_picture,
- int n, int size, uint8_t * const mv_penalty)
+ int pred_x, int pred_y, uint8_t *ref_data[3],
+ int size, uint8_t * const mv_penalty)
{
const int xx = 16 * s->mb_x + 8*(n&1);
const int yy = 16 * s->mb_y + 8*(n>>1);
@@ -80,7 +81,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
const int my = *my_ptr;
const int penalty_factor= s->me.sub_penalty_factor;
- LOAD_COMMON(xx, yy);
+ LOAD_COMMON
// INIT;
//FIXME factorize
@@ -139,19 +140,17 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
#else
static int RENAME(hpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, Picture *ref_picture,
- int n, int size, uint8_t * const mv_penalty)
+ int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
+ int size, int h, uint8_t * const mv_penalty)
{
- const int xx = 16 * s->mb_x + 8*(n&1);
- const int yy = 16 * s->mb_y + 8*(n>>1);
const int mx = *mx_ptr;
const int my = *my_ptr;
const int penalty_factor= s->me.sub_penalty_factor;
me_cmp_func cmp_sub, chroma_cmp_sub;
int bx=2*mx, by=2*my;
- LOAD_COMMON(xx, yy);
+ LOAD_COMMON
//FIXME factorize
@@ -247,20 +246,18 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
}
#endif
-static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture,
+static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
uint8_t * const mv_penalty)
{
// const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
const int size= 0;
- const int xx = 16 * s->mb_x;
- const int yy = 16 * s->mb_y;
+ const int h= 16;
const int penalty_factor= s->me.mb_penalty_factor;
- const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these
- const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit
me_cmp_func cmp_sub, chroma_cmp_sub;
int d;
- LOAD_COMMON(xx, yy);
+ LOAD_COMMON
//FIXME factorize
@@ -295,12 +292,10 @@ static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
static int RENAME(qpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, Picture *ref_picture,
- int n, int size, uint8_t * const mv_penalty)
+ int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
+ int size, int h, uint8_t * const mv_penalty)
{
- const int xx = 16 * s->mb_x + 8*(n&1);
- const int yy = 16 * s->mb_y + 8*(n>>1);
const int mx = *mx_ptr;
const int my = *my_ptr;
const int penalty_factor= s->me.sub_penalty_factor;
@@ -310,7 +305,7 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
me_cmp_func cmp, chroma_cmp;
me_cmp_func cmp_sub, chroma_cmp_sub;
- LOAD_COMMON(xx, yy);
+ LOAD_COMMON
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME
@@ -514,19 +509,17 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
return dmin;
}
-static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture,
+static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
uint8_t * const mv_penalty)
{
const int size= 0;
- const int xx = 16 * s->mb_x;
- const int yy = 16 * s->mb_y;
+ const int h= 16;
const int penalty_factor= s->me.mb_penalty_factor;
- const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these
- const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit
me_cmp_func cmp_sub, chroma_cmp_sub;
int d;
- LOAD_COMMON(xx, yy);
+ LOAD_COMMON
//FIXME factorize
@@ -597,15 +590,16 @@ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x,
static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin,
- Picture *ref_picture,
+ uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
int const pred_x, int const pred_y, int const penalty_factor,
- int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
- uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
+ int const shift,
+ uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{
me_cmp_func cmp, chroma_cmp;
int next_dir=-1;
- LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+ LOAD_COMMON
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
@@ -639,15 +633,16 @@ static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, in
}
static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin,
- Picture *ref_picture,
+ uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
int const pred_x, int const pred_y, int const penalty_factor,
- int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
- uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
+ int const shift,
+ uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{
me_cmp_func cmp, chroma_cmp;
int dia_size;
- LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+ LOAD_COMMON
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
@@ -730,17 +725,18 @@ if(256*256*256*64 % (stats[0]+1)==0){
#define MAX_SAB_SIZE 16
static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin,
- Picture *ref_picture,
+ uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
int const pred_x, int const pred_y, int const penalty_factor,
- int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
- uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
+ int const shift,
+ uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{
me_cmp_func cmp, chroma_cmp;
Minima minima[MAX_SAB_SIZE];
const int minima_count= ABS(s->me.dia_size);
int i, j;
- LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+ LOAD_COMMON
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
@@ -810,15 +806,16 @@ static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int
}
static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin,
- Picture *ref_picture,
+ uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride,
int const pred_x, int const pred_y, int const penalty_factor,
- int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
- uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty
+ int const shift,
+ uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{
me_cmp_func cmp, chroma_cmp;
int dia_size;
- LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+ LOAD_COMMON
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
@@ -886,10 +883,10 @@ if(256*256*256*64 % (stats[0]+1)==0){
return dmin;
}
-static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
+static int RENAME(epzs_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr,
- int P[10][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2],
+ int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2],
int ref_mv_scale, uint8_t * const mv_penalty)
{
int best[2]={0, 0};
@@ -899,10 +896,11 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
int map_generation;
const int penalty_factor= s->me.penalty_factor;
const int size=0;
- const int ref_mv_stride= s->mb_stride;
- const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride;
+ const int h=16;
+ const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
+ const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
me_cmp_func cmp, chroma_cmp;
- LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+ LOAD_COMMON
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
@@ -973,21 +971,21 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
//check(best[0],best[1],0, b0)
if(s->me.dia_size==-1)
- dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<-1)
- dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<2)
- dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
else
- dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
//check(best[0],best[1],0, b1)
*mx_ptr= best[0];
@@ -998,10 +996,11 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
}
#ifndef CMP_DIRECT /* no 4mv search needed in direct mode */
-static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
+static int RENAME(epzs_motion_search4)(MpegEncContext * s,
int *mx_ptr, int *my_ptr,
int P[10][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2],
+ uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2],
int ref_mv_scale, uint8_t * const mv_penalty)
{
int best[2]={0, 0};
@@ -1011,10 +1010,11 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
int map_generation;
const int penalty_factor= s->me.penalty_factor;
const int size=1;
+ const int h=8;
const int ref_mv_stride= s->mb_stride;
const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
me_cmp_func cmp, chroma_cmp;
- LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8);
+ LOAD_COMMON
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
@@ -1024,7 +1024,7 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
dmin = 1000000;
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
/* first line */
- if (s->mb_y == 0 && block<2) {
+ if (s->mb_y == 0/* && block<2*/) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
@@ -1049,21 +1049,100 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
}
if(s->me.dia_size==-1)
- dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<-1)
- dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<2)
- dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
else
- dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
- pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
- shift, map, map_generation, size, mv_penalty);
+ dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
+
+
+ *mx_ptr= best[0];
+ *my_ptr= best[1];
+
+// printf("%d %d %d \n", best[0], best[1], dmin);
+ return dmin;
+}
+
+//try to merge with above FIXME (needs PSNR test)
+static int RENAME(epzs_motion_search2)(MpegEncContext * s,
+ int *mx_ptr, int *my_ptr,
+ int P[10][2], int pred_x, int pred_y,
+ uint8_t *src_data[3],
+ uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2],
+ int ref_mv_scale, uint8_t * const mv_penalty)
+{
+ int best[2]={0, 0};
+ int d, dmin;
+ const int shift= 1+s->quarter_sample;
+ uint32_t *map= s->me.map;
+ int map_generation;
+ const int penalty_factor= s->me.penalty_factor;
+ const int size=0; //FIXME pass as arg
+ const int h=8;
+ const int ref_mv_stride= s->mb_stride;
+ const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
+ me_cmp_func cmp, chroma_cmp;
+ LOAD_COMMON
+
+ cmp= s->dsp.me_cmp[size];
+ chroma_cmp= s->dsp.me_cmp[size+1];
+
+ map_generation= update_map_generation(s);
+
+ dmin = 1000000;
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
+ /* first line */
+ if (s->mb_y == 0) {
+ CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+ CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+ }else{
+ CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+ //FIXME try some early stop
+ if(dmin>64*2){
+ CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
+ CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
+ CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+ CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+ }
+ }
+ if(dmin>64*4){
+ CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
+ (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
+ CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+ (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+ }
+
+ if(s->me.dia_size==-1)
+ dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
+ else if(s->me.dia_size<-1)
+ dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
+ else if(s->me.dia_size<2)
+ dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
+ else
+ dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
+ pred_x, pred_y, penalty_factor,
+ shift, map, map_generation, size, h, mv_penalty);
+
*mx_ptr= best[0];
*my_ptr= best[1];
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 4cc164de31..81cd9619c1 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -29,6 +29,9 @@
#include "mpeg12data.h"
+//#undef NDEBUG
+//#include <assert.h>
+
/* Start codes. */
#define SEQ_END_CODE 0x000001b7
@@ -476,12 +479,12 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
}
static inline void put_mb_modes(MpegEncContext *s, int n, int bits,
- int has_mv)
+ int has_mv, int field_motion)
{
put_bits(&s->pb, n, bits);
if (!s->frame_pred_frame_dct) {
if (has_mv)
- put_bits(&s->pb, 2, 2); /* motion_type: frame */
+ put_bits(&s->pb, 2, 2 - field_motion); /* motion_type: frame/field */
put_bits(&s->pb, 1, s->interlaced_dct);
}
}
@@ -501,9 +504,9 @@ void mpeg1_encode_mb(MpegEncContext *s,
if (s->block_last_index[i] >= 0)
cbp |= 1 << (5 - i);
}
-
+
if (cbp == 0 && !first_mb && (mb_x != s->mb_width - 1 || (mb_y != s->mb_height - 1 && s->codec_id == CODEC_ID_MPEG1VIDEO)) &&
- ((s->pict_type == P_TYPE && (motion_x | motion_y) == 0) ||
+ ((s->pict_type == P_TYPE && s->mv_type == MV_TYPE_16X16 && (motion_x | motion_y) == 0) ||
(s->pict_type == B_TYPE && s->mv_dir == s->last_mv_dir && (((s->mv_dir & MV_DIR_FORWARD) ? ((s->mv[0][0][0] - s->last_mv[0][0][0])|(s->mv[0][0][1] - s->last_mv[0][0][1])) : 0) |
((s->mv_dir & MV_DIR_BACKWARD) ? ((s->mv[1][0][0] - s->last_mv[1][0][0])|(s->mv[1][0][1] - s->last_mv[1][0][1])) : 0)) == 0))) {
s->mb_skip_run++;
@@ -511,6 +514,10 @@ void mpeg1_encode_mb(MpegEncContext *s,
s->skip_count++;
s->misc_bits++;
s->last_bits++;
+ if(s->pict_type == P_TYPE){
+ s->last_mv[0][1][0]= s->last_mv[0][0][0]=
+ s->last_mv[0][1][1]= s->last_mv[0][0][1]= 0;
+ }
} else {
if(first_mb){
assert(s->mb_skip_run == 0);
@@ -521,150 +528,167 @@ void mpeg1_encode_mb(MpegEncContext *s,
if (s->pict_type == I_TYPE) {
if(s->dquant && cbp){
- put_mb_modes(s, 2, 1, 0); /* macroblock_type : macroblock_quant = 1 */
+ put_mb_modes(s, 2, 1, 0, 0); /* macroblock_type : macroblock_quant = 1 */
put_bits(&s->pb, 5, s->qscale);
}else{
- put_mb_modes(s, 1, 1, 0); /* macroblock_type : macroblock_quant = 0 */
+ put_mb_modes(s, 1, 1, 0, 0); /* macroblock_type : macroblock_quant = 0 */
s->qscale -= s->dquant;
}
s->misc_bits+= get_bits_diff(s);
s->i_count++;
} else if (s->mb_intra) {
if(s->dquant && cbp){
- put_mb_modes(s, 6, 0x01, 0);
+ put_mb_modes(s, 6, 0x01, 0, 0);
put_bits(&s->pb, 5, s->qscale);
}else{
- put_mb_modes(s, 5, 0x03, 0);
+ put_mb_modes(s, 5, 0x03, 0, 0);
s->qscale -= s->dquant;
}
s->misc_bits+= get_bits_diff(s);
s->i_count++;
- s->last_mv[0][0][0] =
- s->last_mv[0][0][1] = 0;
+ memset(s->last_mv, 0, sizeof(s->last_mv));
} else if (s->pict_type == P_TYPE) {
+ if(s->mv_type == MV_TYPE_16X16){
if (cbp != 0) {
- if (motion_x == 0 && motion_y == 0) {
+ if ((motion_x|motion_y) == 0) {
if(s->dquant){
- put_mb_modes(s, 5, 1, 0); /* macroblock_pattern & quant */
+ put_mb_modes(s, 5, 1, 0, 0); /* macroblock_pattern & quant */
put_bits(&s->pb, 5, s->qscale);
}else{
- put_mb_modes(s, 2, 1, 0); /* macroblock_pattern only */
+ put_mb_modes(s, 2, 1, 0, 0); /* macroblock_pattern only */
}
s->misc_bits+= get_bits_diff(s);
- put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
} else {
if(s->dquant){
- put_mb_modes(s, 5, 2, 1); /* motion + cbp */
+ put_mb_modes(s, 5, 2, 1, 0); /* motion + cbp */
put_bits(&s->pb, 5, s->qscale);
}else{
- put_mb_modes(s, 1, 1, 1); /* motion + cbp */
+ put_mb_modes(s, 1, 1, 1, 0); /* motion + cbp */
}
s->misc_bits+= get_bits_diff(s);
mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); // RAL: f_code parameter added
mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); // RAL: f_code parameter added
s->mv_bits+= get_bits_diff(s);
- put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
}
} else {
put_bits(&s->pb, 3, 1); /* motion only */
if (!s->frame_pred_frame_dct)
put_bits(&s->pb, 2, 2); /* motion_type: frame */
+ s->misc_bits+= get_bits_diff(s);
mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); // RAL: f_code parameter added
mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); // RAL: f_code parameter added
s->qscale -= s->dquant;
s->mv_bits+= get_bits_diff(s);
}
- s->f_count++;
- } else
- { // RAL: All the following bloc added for B frames:
- if (cbp != 0)
- { // With coded bloc pattern
- if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD))
- { // Bi-directional motion
- if (s->dquant) {
- put_mb_modes(s, 5, 2, 1);
- put_bits(&s->pb, 5, s->qscale);
- } else {
- put_mb_modes(s, 2, 3, 1);
- }
- s->misc_bits += get_bits_diff(s);
- mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
- mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
- mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
- mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
- s->b_count++;
- s->f_count++;
- s->mv_bits += get_bits_diff(s);
- put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
- }
- else if (s->mv_dir == MV_DIR_BACKWARD)
- { // Backward motion
- if (s->dquant) {
- put_mb_modes(s, 6, 2, 1);
- put_bits(&s->pb, 5, s->qscale);
- } else {
- put_mb_modes(s, 3, 3, 1);
- }
- s->misc_bits += get_bits_diff(s);
- mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
- mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
- s->b_count++;
- s->mv_bits += get_bits_diff(s);
- put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
- }
- else if (s->mv_dir == MV_DIR_FORWARD)
- { // Forward motion
- if (s->dquant) {
- put_mb_modes(s, 6, 3, 1);
- put_bits(&s->pb, 5, s->qscale);
- } else {
- put_mb_modes(s, 4, 3, 1);
- }
- s->misc_bits += get_bits_diff(s);
- mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
- mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
- s->f_count++;
- s->mv_bits += get_bits_diff(s);
- put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
- }
+ s->last_mv[0][1][0]= s->last_mv[0][0][0]= motion_x;
+ s->last_mv[0][1][1]= s->last_mv[0][0][1]= motion_y;
+ }else{
+ assert(!s->frame_pred_frame_dct && s->mv_type == MV_TYPE_FIELD);
+
+ if (cbp) {
+ if(s->dquant){
+ put_mb_modes(s, 5, 2, 1, 1); /* motion + cbp */
+ put_bits(&s->pb, 5, s->qscale);
+ }else{
+ put_mb_modes(s, 1, 1, 1, 1); /* motion + cbp */
}
- else
- { // No coded bloc pattern
- if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD))
- { // Bi-directional motion
- put_bits(&s->pb, 2, 2); /* backward & forward motion */
- if (!s->frame_pred_frame_dct)
- put_bits(&s->pb, 2, 2); /* motion_type: frame */
- mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
- mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
- mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
- mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
- s->b_count++;
- s->f_count++;
- }
- else if (s->mv_dir == MV_DIR_BACKWARD)
- { // Backward motion
- put_bits(&s->pb, 3, 2); /* backward motion only */
- if (!s->frame_pred_frame_dct)
- put_bits(&s->pb, 2, 2); /* motion_type: frame */
- mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
- mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
- s->b_count++;
- }
- else if (s->mv_dir == MV_DIR_FORWARD)
- { // Forward motion
- put_bits(&s->pb, 4, 2); /* forward motion only */
- if (!s->frame_pred_frame_dct)
- put_bits(&s->pb, 2, 2); /* motion_type: frame */
- mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
- mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
- s->f_count++;
- }
+ } else {
+ put_bits(&s->pb, 3, 1); /* motion only */
+ put_bits(&s->pb, 2, 1); /* motion_type: field */
s->qscale -= s->dquant;
- s->mv_bits += get_bits_diff(s);
+ }
+ s->misc_bits+= get_bits_diff(s);
+ for(i=0; i<2; i++){
+ put_bits(&s->pb, 1, s->field_select[0][i]);
+ mpeg1_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code);
+ mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code);
+ s->last_mv[0][i][0]= s->mv[0][i][0];
+ s->last_mv[0][i][1]= 2*s->mv[0][i][1];
+ }
+ s->mv_bits+= get_bits_diff(s);
+ }
+ if(cbp)
+ put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
+ s->f_count++;
+ } else{
+ static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi
+
+ if(s->mv_type == MV_TYPE_16X16){
+ if (cbp){ // With coded bloc pattern
+ if (s->dquant) {
+ if(s->mv_dir == MV_DIR_FORWARD)
+ put_mb_modes(s, 6, 3, 1, 0);
+ else
+ put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 0);
+ put_bits(&s->pb, 5, s->qscale);
+ } else {
+ put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 0);
+ }
+ }else{ // No coded bloc pattern
+ put_bits(&s->pb, mb_type_len[s->mv_dir], 2);
+ if (!s->frame_pred_frame_dct)
+ put_bits(&s->pb, 2, 2); /* motion_type: frame */
+ s->qscale -= s->dquant;
+ }
+ s->misc_bits += get_bits_diff(s);
+ if (s->mv_dir&MV_DIR_FORWARD){
+ mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
+ mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+ s->last_mv[0][0][0]=s->last_mv[0][1][0]= s->mv[0][0][0];
+ s->last_mv[0][0][1]=s->last_mv[0][1][1]= s->mv[0][0][1];
+ s->f_count++;
+ }
+ if (s->mv_dir&MV_DIR_BACKWARD){
+ mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
+ mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+ s->last_mv[1][0][0]=s->last_mv[1][1][0]= s->mv[1][0][0];
+ s->last_mv[1][0][1]=s->last_mv[1][1][1]= s->mv[1][0][1];
+ s->b_count++;
+ }
+ }else{
+ assert(s->mv_type == MV_TYPE_FIELD);
+ assert(!s->frame_pred_frame_dct);
+ if (cbp){ // With coded bloc pattern
+ if (s->dquant) {
+ if(s->mv_dir == MV_DIR_FORWARD)
+ put_mb_modes(s, 6, 3, 1, 1);
+ else
+ put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 1);
+ put_bits(&s->pb, 5, s->qscale);
+ } else {
+ put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 1);
}
- // End of bloc from RAL
+ }else{ // No coded bloc pattern
+ put_bits(&s->pb, mb_type_len[s->mv_dir], 2);
+ put_bits(&s->pb, 2, 1); /* motion_type: field */
+ s->qscale -= s->dquant;
+ }
+ s->misc_bits += get_bits_diff(s);
+ if (s->mv_dir&MV_DIR_FORWARD){
+ for(i=0; i<2; i++){
+ put_bits(&s->pb, 1, s->field_select[0][i]);
+ mpeg1_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code);
+ mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code);
+ s->last_mv[0][i][0]= s->mv[0][i][0];
+ s->last_mv[0][i][1]= 2*s->mv[0][i][1];
+ }
+ s->f_count++;
+ }
+ if (s->mv_dir&MV_DIR_BACKWARD){
+ for(i=0; i<2; i++){
+ put_bits(&s->pb, 1, s->field_select[1][i]);
+ mpeg1_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0] , s->b_code);
+ mpeg1_encode_motion(s, s->mv[1][i][1] - (s->last_mv[1][i][1]>>1), s->b_code);
+ s->last_mv[1][i][0]= s->mv[1][i][0];
+ s->last_mv[1][i][1]= 2*s->mv[1][i][1];
+ }
+ s->b_count++;
+ }
}
+ s->mv_bits += get_bits_diff(s);
+ if(cbp)
+ put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]);
+ }
for(i=0;i<6;i++) {
if (cbp & (1 << (5 - i))) {
mpeg1_encode_block(s, block[i], i);
@@ -676,18 +700,6 @@ void mpeg1_encode_mb(MpegEncContext *s,
else
s->p_tex_bits+= get_bits_diff(s);
}
-
- // RAL: By this:
- if (s->mv_dir & MV_DIR_FORWARD)
- {
- s->last_mv[0][0][0]= s->mv[0][0][0];
- s->last_mv[0][0][1]= s->mv[0][0][1];
- }
- if (s->mv_dir & MV_DIR_BACKWARD)
- {
- s->last_mv[1][0][0]= s->mv[1][0][0];
- s->last_mv[1][0][1]= s->mv[1][0][1];
- }
}
// RAL: Parameter added: f_or_b_code
@@ -1952,7 +1964,7 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
s->repeat_first_field = get_bits1(&s->gb);
s->chroma_420_type = get_bits1(&s->gb);
s->progressive_frame = get_bits1(&s->gb);
-
+
if(s->picture_structure == PICT_FRAME)
s->first_field=0;
else{
@@ -1963,13 +1975,9 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
if(s->alternate_scan){
ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan);
ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
}else{
ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct);
ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
}
/* composite display not parsed */
@@ -2103,10 +2111,10 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
s->qscale = get_qscale(s);
if (s->first_slice && (s->first_field || s->picture_structure==PICT_FRAME)) {
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
- av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n",
+ av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n",
s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")),
- s->progressive_sequence ? "pro" :"", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"",
+ s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"",
s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
}
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 1b3be9dbc2..bbb427b558 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -252,8 +252,13 @@ int DCT_common_init(MpegEncContext *s)
/* load & permutate scantables
note: only wmv uses differnt ones
*/
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct);
+ if(s->alternate_scan){
+ ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan);
+ ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan);
+ }else{
+ ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct);
+ ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct);
+ }
ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
@@ -394,7 +399,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
/* init common structure for both encoder and decoder */
int MPV_common_init(MpegEncContext *s)
{
- int y_size, c_size, yc_size, i, mb_array_size, x, y;
+ int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
dsputil_init(&s->dsp, s->avctx);
DCT_common_init(s);
@@ -407,6 +412,7 @@ int MPV_common_init(MpegEncContext *s)
s->b8_stride = s->mb_width*2 + 1;
s->b4_stride = s->mb_width*4 + 1;
mb_array_size= s->mb_height * s->mb_stride;
+ mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
/* set default edge pos, will be overriden in decode_header if needed */
s->h_edge_pos= s->mb_width*16;
@@ -458,8 +464,6 @@ int MPV_common_init(MpegEncContext *s)
s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
if (s->encoding) {
- int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
-
/* Allocate MV tables */
CHECKED_ALLOCZ(s->p_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
CHECKED_ALLOCZ(s->b_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
@@ -491,7 +495,7 @@ int MPV_common_init(MpegEncContext *s)
CHECKED_ALLOCZ(s->avctx->stats_out, 256);
/* Allocate MB type table */
- CHECKED_ALLOCZ(s->mb_type , mb_array_size * sizeof(uint8_t)) //needed for encoding
+ CHECKED_ALLOCZ(s->mb_type , mb_array_size * sizeof(uint16_t)) //needed for encoding
CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
@@ -513,10 +517,21 @@ int MPV_common_init(MpegEncContext *s)
CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
- if(s->codec_id==CODEC_ID_MPEG4){
+ if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
/* interlaced direct mode decoding tables */
- CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
- CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
+ for(i=0; i<2; i++){
+ int j, k;
+ for(j=0; j<2; j++){
+ for(k=0; k<2; k++){
+ CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k] , mv_table_size * 2 * sizeof(int16_t))
+ s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] + s->mb_stride + 1;
+ }
+ CHECKED_ALLOCZ(s->b_field_select_table[i][j] , mb_array_size * 2 * sizeof(uint8_t))
+ CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j] , mv_table_size * 2 * sizeof(int16_t))
+ s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j] + s->mb_stride + 1;
+ }
+ CHECKED_ALLOCZ(s->p_field_select_table[i] , mb_array_size * 2 * sizeof(uint8_t))
+ }
}
if (s->out_format == FMT_H263) {
/* ac values */
@@ -583,7 +598,7 @@ int MPV_common_init(MpegEncContext *s)
/* init common structure for both encoder and decoder */
void MPV_common_end(MpegEncContext *s)
{
- int i;
+ int i, j, k;
av_freep(&s->parse_context.buffer);
s->parse_context.buffer_size=0;
@@ -601,6 +616,18 @@ void MPV_common_end(MpegEncContext *s)
s->b_bidir_forw_mv_table= NULL;
s->b_bidir_back_mv_table= NULL;
s->b_direct_mv_table= NULL;
+ for(i=0; i<2; i++){
+ for(j=0; j<2; j++){
+ for(k=0; k<2; k++){
+ av_freep(&s->b_field_mv_table_base[i][j][k]);
+ s->b_field_mv_table[i][j][k]=NULL;
+ }
+ av_freep(&s->b_field_select_table[i][j]);
+ av_freep(&s->p_field_mv_table_base[i][j]);
+ s->p_field_mv_table[i][j]=NULL;
+ }
+ av_freep(&s->p_field_select_table[i]);
+ }
av_freep(&s->dc_val[0]);
av_freep(&s->ac_val[0]);
@@ -618,8 +645,6 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->tex_pb_buffer);
av_freep(&s->pb2_buffer);
av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
- av_freep(&s->field_mv_table);
- av_freep(&s->field_select_table);
av_freep(&s->avctx->stats_out);
av_freep(&s->ac_stats);
av_freep(&s->error_status_table);
@@ -692,7 +717,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->me_method = avctx->me_method;
/* Fixed QSCALE */
- s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
+ s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
s->adaptive_quant= ( s->avctx->lumi_masking
|| s->avctx->dark_masking
@@ -702,8 +727,9 @@ int MPV_encode_init(AVCodecContext *avctx)
|| (s->flags&CODEC_FLAG_QP_RD))
&& !s->fixed_qscale;
- s->obmc= (s->flags & CODEC_FLAG_OBMC);
- s->loop_filter= (s->flags & CODEC_FLAG_LOOP_FILTER);
+ s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
+ s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
+ s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
&& s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
@@ -934,7 +960,7 @@ int MPV_encode_init(AVCodecContext *avctx)
if(s->modified_quant)
s->chroma_qscale_table= ff_h263_chroma_qscale_table;
s->progressive_frame=
- s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
+ s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
ff_init_me(s);
@@ -1610,7 +1636,7 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int st
for(y=0; y<h; y+=16){
for(x=0; x<w; x+=16){
int offset= x + y*stride;
- int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
+ int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
int sae = get_sae(src + offset, mean, stride);
@@ -1906,7 +1932,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate){
int vbv_delay;
- assert(s->repeat_first_field==0 && s->avctx->repeat_pic==0);
+ assert(s->repeat_first_field==0);
vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
assert(vbv_delay < 0xFFFF);
@@ -3300,7 +3326,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
if(s->flags&CODEC_FLAG_INTERLACED_DCT){
int progressive_score, interlaced_score;
-
+
progressive_score= pix_vcmp16x8(ptr, wrap_y ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y , wrap_y*2);
@@ -3417,12 +3443,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
/* pre quantization */
if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
//FIXME optimize
- if(s->dsp.pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
- if(s->dsp.pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
- if(s->dsp.pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
- if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
- if(s->dsp.pix_abs8x8(ptr_cb , dest_cb , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
- if(s->dsp.pix_abs8x8(ptr_cr , dest_cr , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
+ if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
+ if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
+ if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
+ if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
+ if(s->dsp.sad[1](NULL, ptr_cb , dest_cb , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
+ if(s->dsp.sad[1](NULL, ptr_cr , dest_cr , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
#if 0
{
static int stat[7];
@@ -3484,6 +3510,19 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
}
+ //non c quantize code returns incorrect block_last_index FIXME
+ if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
+ for(i=0; i<6; i++){
+ int j;
+ if(s->block_last_index[i]>0){
+ for(j=63; j>0; j--){
+ if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
+ }
+ s->block_last_index[i]= j;
+ }
+ }
+ }
+
/* huffman encode */
switch(s->codec_id){ //FIXME funct ptr could be slightly faster
case CODEC_ID_MPEG1VIDEO:
@@ -3724,9 +3763,9 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in
int x,y;
if(w==16 && h==16)
- return s->dsp.sse[0](NULL, src1, src2, stride);
+ return s->dsp.sse[0](NULL, src1, src2, stride, 16);
else if(w==8 && h==8)
- return s->dsp.sse[1](NULL, src1, src2, stride);
+ return s->dsp.sse[1](NULL, src1, src2, stride, 8);
for(y=0; y<h; y++){
for(x=0; x<w; x++){
@@ -3747,9 +3786,9 @@ static int sse_mb(MpegEncContext *s){
if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
if(w==16 && h==16)
- return s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
- +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
- +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
+ return s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
+ +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
+ +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
else
return sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
+sse(s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
@@ -3759,7 +3798,7 @@ static int sse_mb(MpegEncContext *s){
static void encode_picture(MpegEncContext *s, int picture_number)
{
int mb_x, mb_y, pdif = 0;
- int i;
+ int i, j;
int bits;
MpegEncContext best_s, backup_s;
uint8_t bit_buf[2][3000];
@@ -3843,7 +3882,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
//FIXME do we need to zero them?
memset(s->current_picture.motion_val[0][0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
memset(s->p_mv_table , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
- memset(s->mb_type , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
+ for(i=0; i<s->mb_stride*s->mb_height; i++)
+ s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
if(!s->fixed_qscale){
/* finding spatial complexity for I-frame rate control */
@@ -3868,32 +3908,61 @@ static void encode_picture(MpegEncContext *s, int picture_number)
if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
s->pict_type= I_TYPE;
- memset(s->mb_type , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
+ for(i=0; i<s->mb_stride*s->mb_height; i++)
+ s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
//printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
}
if(!s->umvplus){
if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
- s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
-
+ s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
+
+ if(s->flags & CODEC_FLAG_INTERLACED_ME){
+ int a,b;
+ a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
+ b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
+ s->f_code= FFMAX(s->f_code, FFMAX(a,b));
+ }
+
ff_fix_long_p_mvs(s);
+ ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
+ if(s->flags & CODEC_FLAG_INTERLACED_ME){
+ for(i=0; i<2; i++){
+ for(j=0; j<2; j++)
+ ff_fix_long_mvs(s, s->p_field_select_table[i], j,
+ s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
+ }
+ }
}
if(s->pict_type==B_TYPE){
int a, b;
- a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
- b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
+ a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
+ b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
s->f_code = FFMAX(a, b);
- a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
- b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
+ a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
+ b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
s->b_code = FFMAX(a, b);
- ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
- ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
- ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
- ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
+ ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
+ ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
+ ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+ ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+ if(s->flags & CODEC_FLAG_INTERLACED_ME){
+ int dir;
+ for(dir=0; dir<2; dir++){
+ for(i=0; i<2; i++){
+ for(j=0; j<2; j++){
+ int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
+ : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
+ ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
+ s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
+ }
+ }
+ }
+ }
}
}
@@ -3990,10 +4059,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->current_picture_ptr->error[i] = 0;
}
s->mb_skip_run = 0;
- s->last_mv[0][0][0] = 0;
- s->last_mv[0][0][1] = 0;
- s->last_mv[1][0][0] = 0;
- s->last_mv[1][0][1] = 0;
+ memset(s->last_mv, 0, sizeof(s->last_mv));
s->last_mv_dir = 0;
@@ -4027,6 +4093,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
int mb_type= s->mb_type[xy];
// int d;
int dmin= INT_MAX;
+ int dir;
s->mb_x = mb_x;
ff_update_block_index(s);
@@ -4134,25 +4201,37 @@ static void encode_picture(MpegEncContext *s, int picture_number)
backup_s.tex_pb= s->tex_pb;
}
- if(mb_type&MB_TYPE_INTER){
+ if(mb_type&CANDIDATE_MB_TYPE_INTER){
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 0;
s->mv[0][0][0] = s->p_mv_table[xy][0];
s->mv[0][0][1] = s->p_mv_table[xy][1];
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
&dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
}
- if(mb_type&MB_TYPE_SKIPED){
+ if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(i=0; i<2; i++){
+ j= s->field_select[0][i] = s->p_field_select_table[i][xy];
+ s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
+ s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
+ }
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ }
+ if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 0;
s->mv[0][0][0] = 0;
s->mv[0][0][1] = 0;
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_SKIPED, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb,
&dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
}
- if(mb_type&MB_TYPE_INTER4V){
+ if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_8X8;
s->mb_intra= 0;
@@ -4160,28 +4239,28 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
}
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
&dmin, &next_block, 0, 0);
}
- if(mb_type&MB_TYPE_FORWARD){
+ if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 0;
s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
&dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
}
- if(mb_type&MB_TYPE_BACKWARD){
+ if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
s->mv_dir = MV_DIR_BACKWARD;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 0;
s->mv[1][0][0] = s->b_back_mv_table[xy][0];
s->mv[1][0][1] = s->b_back_mv_table[xy][1];
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
&dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
}
- if(mb_type&MB_TYPE_BIDIR){
+ if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 0;
@@ -4189,10 +4268,10 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
&dmin, &next_block, 0, 0);
}
- if(mb_type&MB_TYPE_DIRECT){
+ if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
int mx= s->b_direct_mv_table[xy][0];
int my= s->b_direct_mv_table[xy][1];
@@ -4201,16 +4280,54 @@ static void encode_picture(MpegEncContext *s, int picture_number)
#ifdef CONFIG_RISKY
ff_mpeg4_set_direct_mv(s, mx, my);
#endif
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
&dmin, &next_block, mx, my);
}
- if(mb_type&MB_TYPE_INTRA){
+ if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(i=0; i<2; i++){
+ j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
+ s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
+ s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
+ }
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ }
+ if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
+ s->mv_dir = MV_DIR_BACKWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(i=0; i<2; i++){
+ j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
+ s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
+ s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
+ }
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ }
+ if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(dir=0; dir<2; dir++){
+ for(i=0; i<2; i++){
+ j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
+ s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
+ s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
+ }
+ }
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
+ &dmin, &next_block, 0, 0);
+ }
+ if(mb_type&CANDIDATE_MB_TYPE_INTRA){
s->mv_dir = 0;
s->mv_type = MV_TYPE_16X16;
s->mb_intra= 1;
s->mv[0][0][0] = 0;
s->mv[0][0][1] = 0;
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
&dmin, &next_block, 0, 0);
if(s->h263_pred || s->h263_aic){
if(best_s.mb_intra)
@@ -4252,7 +4369,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
}
}
- encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
+ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
&dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
if(best_s.qscale != qp){
if(s->mb_intra){
@@ -4312,19 +4429,30 @@ static void encode_picture(MpegEncContext *s, int picture_number)
// only one MB-Type possible
switch(mb_type){
- case MB_TYPE_INTRA:
+ case CANDIDATE_MB_TYPE_INTRA:
s->mv_dir = 0;
s->mb_intra= 1;
motion_x= s->mv[0][0][0] = 0;
motion_y= s->mv[0][0][1] = 0;
break;
- case MB_TYPE_INTER:
+ case CANDIDATE_MB_TYPE_INTER:
s->mv_dir = MV_DIR_FORWARD;
s->mb_intra= 0;
motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
break;
- case MB_TYPE_INTER4V:
+ case CANDIDATE_MB_TYPE_INTER_I:
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(i=0; i<2; i++){
+ j= s->field_select[0][i] = s->p_field_select_table[i][xy];
+ s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
+ s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
+ }
+ motion_x = motion_y = 0;
+ break;
+ case CANDIDATE_MB_TYPE_INTER4V:
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_8X8;
s->mb_intra= 0;
@@ -4334,7 +4462,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
}
motion_x= motion_y= 0;
break;
- case MB_TYPE_DIRECT:
+ case CANDIDATE_MB_TYPE_DIRECT:
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
s->mb_intra= 0;
motion_x=s->b_direct_mv_table[xy][0];
@@ -4343,7 +4471,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
#endif
break;
- case MB_TYPE_BIDIR:
+ case CANDIDATE_MB_TYPE_BIDIR:
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
s->mb_intra= 0;
motion_x=0;
@@ -4353,19 +4481,54 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
break;
- case MB_TYPE_BACKWARD:
+ case CANDIDATE_MB_TYPE_BACKWARD:
s->mv_dir = MV_DIR_BACKWARD;
s->mb_intra= 0;
motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
break;
- case MB_TYPE_FORWARD:
+ case CANDIDATE_MB_TYPE_FORWARD:
s->mv_dir = MV_DIR_FORWARD;
s->mb_intra= 0;
motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
// printf(" %d %d ", motion_x, motion_y);
break;
+ case CANDIDATE_MB_TYPE_FORWARD_I:
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(i=0; i<2; i++){
+ j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
+ s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
+ s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
+ }
+ motion_x=motion_y=0;
+ break;
+ case CANDIDATE_MB_TYPE_BACKWARD_I:
+ s->mv_dir = MV_DIR_BACKWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(i=0; i<2; i++){
+ j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
+ s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
+ s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
+ }
+ motion_x=motion_y=0;
+ break;
+ case CANDIDATE_MB_TYPE_BIDIR_I:
+ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+ s->mv_type = MV_TYPE_FIELD;
+ s->mb_intra= 0;
+ for(dir=0; dir<2; dir++){
+ for(i=0; i<2; i++){
+ j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
+ s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
+ s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
+ }
+ }
+ motion_x=motion_y=0;
+ break;
default:
motion_x=motion_y=0; //gcc warning fix
av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index cf2b5cf258..4bd99e8e92 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -137,6 +137,7 @@ typedef struct Picture{
int16_t (*motion_val_base[2])[2];
int8_t *ref_index[2];
uint32_t *mb_type_base;
+#define MB_TYPE_INTRA MB_TYPE_INTRA4x4 //default mb_type if theres just one type
#define IS_INTRA4x4(a) ((a)&MB_TYPE_INTRA4x4)
#define IS_INTRA16x16(a) ((a)&MB_TYPE_INTRA16x16)
#define IS_PCM(a) ((a)&MB_TYPE_INTRA_PCM)
@@ -206,23 +207,28 @@ typedef struct MotionEstContext{
int mb_penalty_factor;
int pre_pass; ///< = 1 for the pre pass
int dia_size;
+ int xmin;
+ int xmax;
+ int ymin;
+ int ymax;
uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV
int (*sub_motion_search)(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, Picture *ref_picture,
- int n, int size, uint8_t * const mv_penalty);
- int (*motion_search[7])(struct MpegEncContext * s, int block,
+ int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[6], int stride, int uvstride,
+ int size, int h, uint8_t * const mv_penalty);
+ int (*motion_search[7])(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr,
- int P[10][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2],
+ int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2],
int ref_mv_scale, uint8_t * const mv_penalty);
- int (*pre_motion_search)(struct MpegEncContext * s, int block,
+ int (*pre_motion_search)(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr,
- int P[10][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2],
+ int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2],
int ref_mv_scale, uint8_t * const mv_penalty);
- int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture,
+ int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3],
+ uint8_t *ref_data[6], int stride, int uvstride,
uint8_t * const mv_penalty);
}MotionEstContext;
@@ -351,12 +357,18 @@ typedef struct MpegEncContext {
int16_t (*b_bidir_forw_mv_table_base)[2];
int16_t (*b_bidir_back_mv_table_base)[2];
int16_t (*b_direct_mv_table_base)[2];
+ int16_t (*p_field_mv_table_base[2][2])[2];
+ int16_t (*b_field_mv_table_base[2][2][2])[2];
int16_t (*p_mv_table)[2]; ///< MV table (1MV per MB) p-frame encoding
int16_t (*b_forw_mv_table)[2]; ///< MV table (1MV per MB) forward mode b-frame encoding
int16_t (*b_back_mv_table)[2]; ///< MV table (1MV per MB) backward mode b-frame encoding
int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding
int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding
int16_t (*b_direct_mv_table)[2]; ///< MV table (1MV per MB) direct mode b-frame encoding
+ int16_t (*p_field_mv_table[2][2])[2]; ///< MV table (2MV per MB) interlaced p-frame encoding
+ int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced b-frame encoding
+ uint8_t (*p_field_select_table[2]);
+ uint8_t (*b_field_select_table[2][2]);
int me_method; ///< ME algorithm
int scene_change_score;
int mv_dir;
@@ -391,17 +403,22 @@ typedef struct MpegEncContext {
int mb_x, mb_y;
int mb_skip_run;
int mb_intra;
- uint8_t *mb_type; ///< Table for MB type FIXME remove and use picture->mb_type
-#define MB_TYPE_INTRA 0x01
-#define MB_TYPE_INTER 0x02
-#define MB_TYPE_INTER4V 0x04
-#define MB_TYPE_SKIPED 0x08
+ uint16_t *mb_type; ///< Table for candidate MB types for encoding
+#define CANDIDATE_MB_TYPE_INTRA 0x01
+#define CANDIDATE_MB_TYPE_INTER 0x02
+#define CANDIDATE_MB_TYPE_INTER4V 0x04
+#define CANDIDATE_MB_TYPE_SKIPED 0x08
//#define MB_TYPE_GMC 0x10
-#define MB_TYPE_DIRECT 0x10
-#define MB_TYPE_FORWARD 0x20
-#define MB_TYPE_BACKWARD 0x40
-#define MB_TYPE_BIDIR 0x80
+#define CANDIDATE_MB_TYPE_DIRECT 0x10
+#define CANDIDATE_MB_TYPE_FORWARD 0x20
+#define CANDIDATE_MB_TYPE_BACKWARD 0x40
+#define CANDIDATE_MB_TYPE_BIDIR 0x80
+
+#define CANDIDATE_MB_TYPE_INTER_I 0x100
+#define CANDIDATE_MB_TYPE_FORWARD_I 0x200
+#define CANDIDATE_MB_TYPE_BACKWARD_I 0x400
+#define CANDIDATE_MB_TYPE_BIDIR_I 0x800
int block_index[6]; ///< index to current MB in block based arrays with edges
int block_wrap[6];
@@ -551,8 +568,6 @@ typedef struct MpegEncContext {
uint8_t *tex_pb_buffer;
uint8_t *pb2_buffer;
int mpeg_quant;
- int16_t (*field_mv_table)[2][2]; ///< used for interlaced b frame decoding
- int8_t (*field_select_table)[2]; ///< wtf, no really another table for interlaced b frames
int t_frame; ///< time distance of first I -> B, used for interlaced b frames
int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG4
@@ -748,7 +763,8 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
int mb_x, int mb_y);
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
void ff_fix_long_p_mvs(MpegEncContext * s);
-void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
+void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
+ int16_t (*mv_table)[2], int f_code, int type, int truncate);
void ff_init_me(MpegEncContext *s);
int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y);
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 6354807844..d1a2943656 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -45,7 +45,7 @@ static void sigill_handler (int sig)
}
#endif /* CONFIG_DARWIN */
-int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
+int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int i;
int s __attribute__((aligned(16)));
@@ -57,7 +57,7 @@ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
s = 0;
sad = (vector unsigned int)vec_splat_u32(0);
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
/*
Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15]
@@ -92,7 +92,7 @@ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
+int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int i;
int s __attribute__((aligned(16)));
@@ -118,7 +118,7 @@ int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
tv = (vector unsigned char *) &pix2[0];
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
/*
Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15]
@@ -152,7 +152,7 @@ int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
+int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int i;
int s __attribute__((aligned(16)));
@@ -194,7 +194,7 @@ int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
t1 = vec_add(pix2hv, pix2ihv);
t2 = vec_add(pix2lv, pix2ilv);
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
/*
Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15]
@@ -253,7 +253,7 @@ int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
+int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int i;
int s __attribute__((aligned(16)));
@@ -266,7 +266,7 @@ int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
sad = (vector unsigned int)vec_splat_u32(0);
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2 */
perm1 = vec_lvsl(0, pix1);
pix1v = (vector unsigned char *) pix1;
@@ -295,7 +295,7 @@ int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
return s;
}
-int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
+int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int i;
int s __attribute__((aligned(16)));
@@ -309,7 +309,7 @@ int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
- for(i=0;i<8;i++) {
+ for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */
@@ -374,9 +374,9 @@ int pix_norm1_altivec(uint8_t *pix, int line_size)
/**
* Sum of Squared Errors for a 8x8 block.
* AltiVec-enhanced.
- * It's the pix_abs8x8_altivec code above w/ squaring added.
+ * It's the sad8_altivec code above w/ squaring added.
*/
-int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
+int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int i;
int s __attribute__((aligned(16)));
@@ -391,7 +391,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
- for(i=0;i<8;i++) {
+ for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */
@@ -430,9 +430,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
/**
* Sum of Squared Errors for a 16x16 block.
* AltiVec-enhanced.
- * It's the pix_abs16x16_altivec code above w/ squaring added.
+ * It's the sad16_altivec code above w/ squaring added.
*/
-int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
+int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int i;
int s __attribute__((aligned(16)));
@@ -444,7 +444,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
sum = (vector unsigned int)vec_splat_u32(0);
- for(i=0;i<16;i++) {
+ for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2 */
perm1 = vec_lvsl(0, pix1);
pix1v = (vector unsigned char *) pix1;
@@ -609,14 +609,6 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
}
}
-int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
- return pix_abs16x16_altivec(a,b,stride);
-}
-
-int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride) {
- return pix_abs8x8_altivec(a,b,stride);
-}
-
void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int i;
diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h
index f04496d629..93448a1ad7 100644
--- a/libavcodec/ppc/dsputil_altivec.h
+++ b/libavcodec/ppc/dsputil_altivec.h
@@ -24,16 +24,14 @@
#ifdef HAVE_ALTIVEC
-extern int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
-extern int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
-extern int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
-extern int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
-extern int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
-extern int sad16x16_altivec(void *s, uint8_t *a, uint8_t *b, int stride);
-extern int sad8x8_altivec(void *s, uint8_t *a, uint8_t *b, int stride);
+extern int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+extern int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+extern int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+extern int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+extern int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int pix_norm1_altivec(uint8_t *pix, int line_size);
-extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size);
-extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size);
+extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int pix_sum_altivec(uint8_t * pix, int line_size);
extern void diff_pixels_altivec(DCTELEM* block, const uint8_t* s1, const uint8_t* s2, int stride);
extern void get_pixels_altivec(DCTELEM* block, const uint8_t * pixels, int line_size);
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 4d45b4ffcc..9882e401f9 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -240,13 +240,13 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
mm_flags |= MM_ALTIVEC;
// Altivec specific optimisations
- c->pix_abs16x16_x2 = pix_abs16x16_x2_altivec;
- c->pix_abs16x16_y2 = pix_abs16x16_y2_altivec;
- c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec;
- c->pix_abs16x16 = pix_abs16x16_altivec;
- c->pix_abs8x8 = pix_abs8x8_altivec;
- c->sad[0]= sad16x16_altivec;
- c->sad[1]= sad8x8_altivec;
+ c->pix_abs[0][1] = sad16_x2_altivec;
+ c->pix_abs[0][2] = sad16_y2_altivec;
+ c->pix_abs[0][3] = sad16_xy2_altivec;
+ c->pix_abs[0][0] = sad16_altivec;
+ c->pix_abs[1][0] = sad8_altivec;
+ c->sad[0]= sad16_altivec;
+ c->sad[1]= sad8_altivec;
c->pix_norm1 = pix_norm1_altivec;
c->sse[1]= sse8_altivec;
c->sse[0]= sse16_altivec;
diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c
index 955290e44b..53d90056c3 100644
--- a/libavcodec/ratecontrol.c
+++ b/libavcodec/ratecontrol.c
@@ -520,7 +520,7 @@ static void adaptive_quantization(MpegEncContext *s, double q){
if(spat_cplx < 4) spat_cplx= 4; //FIXME finetune
if(temp_cplx < 4) temp_cplx= 4; //FIXME finetune
- if((s->mb_type[mb_xy]&MB_TYPE_INTRA)){//FIXME hq mode
+ if((s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTRA)){//FIXME hq mode
cplx= spat_cplx;
factor= 1.0 + p_masking;
}else{
diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref
index 08afa61a84..4ec3c8f519 100644
--- a/tests/ffmpeg.regression.ref
+++ b/tests/ffmpeg.regression.ref
@@ -26,21 +26,21 @@ stddev: 8.18 bytes:7602176
920a0a8a0063655d1f34dcaad7857f98 *./data/a-h263p.avi
0eb167c9dfcbeeecbf3debed8af8f811 *./data/out.yuv
stddev: 2.08 bytes:7602176
-a8cc41cd5016bbb821e7c2691f5090ea *./data/a-odivx.mp4
-e48114a50ef4cfb4fe2016fa5b34ae4c *./data/out.yuv
-stddev: 8.02 bytes:7602176
+66f8b4b5b4f0655cff7bdbc44969cab3 *./data/a-odivx.mp4
+5bd332c77ef45e58b7017e06a0467dd3 *./data/out.yuv
+stddev: 7.94 bytes:7602176
5704a082cc5c5970620123ae20566286 *./data/a-huffyuv.avi
799d3db687f6cdd7a837ec156efc171f *./data/out.yuv
stddev: 0.00 bytes:7602176
e9f63126859b97cd23cd1413038f8f7b *./data/a-mpeg4-rc.avi
90a159074b1b109569914ee63f387860 *./data/out.yuv
stddev: 10.18 bytes:7145472
-b3f1425e266569d5d726b88eadc13dd4 *./data/a-mpeg4-adv.avi
-fb61365b22c947adbaeab74478579020 *./data/out.yuv
-stddev: 7.31 bytes:7602176
-25ec5ab399fd4db0c8aaea78cb692611 *./data/a-error-mpeg4-adv.avi
-bd441fc1e2fb9a3c0bdc9c5f1ed25ef0 *./data/out.yuv
-stddev: 13.57 bytes:7602176
+d7d295f97a1e07b633f973d2325880ce *./data/a-mpeg4-adv.avi
+612f79510c8098f1421aa154047e2bf2 *./data/out.yuv
+stddev: 7.25 bytes:7602176
+f863f4198521bd76930ea33991b47273 *./data/a-error-mpeg4-adv.avi
+ba7fcd126c7c9fead5a5de71aaaf0624 *./data/out.yuv
+stddev: 16.80 bytes:7602176
328ebd044362116e274739e23c482ee7 *./data/a-mpeg1b.mpg
788a9d500dc8986231a18076fc80fd73 *./data/out.yuv
stddev: 10.07 bytes:7145472
diff --git a/tests/regression.sh b/tests/regression.sh
index 0a895f6f45..714613a1a4 100755
--- a/tests/regression.sh
+++ b/tests/regression.sh
@@ -138,7 +138,7 @@ do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
# mpeg2 encoding interlaced
file=${outfile}mpeg2i.mpg
-do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -interlace $file
+do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -ildct $file
# mpeg2 decoding
do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst
diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref
index 66bcf4e318..3277747836 100644
--- a/tests/rotozoom.regression.ref
+++ b/tests/rotozoom.regression.ref
@@ -26,21 +26,21 @@ stddev: 5.41 bytes:7602176
f7828488c31ccb6787367ef4e4a2ad42 *./data/a-h263p.avi
7d39d1f272205a6a231d0e0baf32ff9d *./data/out.yuv
stddev: 1.91 bytes:7602176
-f17dc7346f5d1d4307ecf4507f10fcc6 *./data/a-odivx.mp4
-ff7ddb57d9038b94f08c43bae7e1329f *./data/out.yuv
-stddev: 5.28 bytes:7602176
+a831828595e5764e6ee30c2d9e548385 *./data/a-odivx.mp4
+ad75d173bd30d642147f00da21df0012 *./data/out.yuv
+stddev: 5.27 bytes:7602176
242a7a18c2793e115007bc163861ef4e *./data/a-huffyuv.avi
dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv
stddev: 0.00 bytes:7602176
6a469f42ce6946dd4c708f9e51e3da6a *./data/a-mpeg4-rc.avi
df9de7134d961119705b4e0cabca1f12 *./data/out.yuv
stddev: 4.20 bytes:7145472
-742ffadf3c309d2c4ac888a6a0905bf9 *./data/a-mpeg4-adv.avi
-b02f71e91e9368ce94814ab3d74f91ba *./data/out.yuv
-stddev: 4.97 bytes:7602176
-f2888ab759ac28aba85a16d3d54b80d0 *./data/a-error-mpeg4-adv.avi
-93ab926aad2e658a5bb00c25b7cefdab *./data/out.yuv
-stddev: 5.22 bytes:7602176
+483504d060b0bd8ac1acfa3a823c2ad7 *./data/a-mpeg4-adv.avi
+08d24bdd7da80cffaf8abaa3e71b1843 *./data/out.yuv
+stddev: 4.96 bytes:7602176
+03ff35856faefb4882eaf4d86d95bea7 *./data/a-error-mpeg4-adv.avi
+8550acff0851ee915bd5800f1e20f37c *./data/out.yuv
+stddev: 9.66 bytes:7602176
671802a2c5078e69f7f422765ea87f2a *./data/a-mpeg1b.mpg
d3d5876cef34b728602d5a22eee9249f *./data/out.yuv
stddev: 5.93 bytes:7145472