4 files changed, 145 insertions, 21 deletions
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index bec29d5560..ad19cdcd55 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -15,7 +15,7 @@ extern "C" {
 
 #define FFMPEG_VERSION_INT     0x000408
 #define FFMPEG_VERSION         "0.4.8"
-#define LIBAVCODEC_BUILD       4680
+#define LIBAVCODEC_BUILD       4681
 
 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
 #define LIBAVCODEC_VERSION     FFMPEG_VERSION
@@ -230,6 +230,7 @@ static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG,
 /* Fx : Flag for h263+ extra options */
 #define CODEC_FLAG_H263P_AIC      0x01000000 ///< Advanced intra coding 
 #define CODEC_FLAG_H263P_UMV      0x02000000 ///< Unlimited motion vector  
+#define CODEC_FLAG_CBP_RD         0x04000000 ///< use rate distortion optimization for cbp
 /* For advanced prediction mode, we reuse the 4MV flag */
 /* Unsupported options :
  * 		Syntax Arithmetic coding (SAC)
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index c74bee8752..845b03e600 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -29,6 +29,8 @@
  */
  
 //#define DEBUG
+#include <limits.h>
+
 #include "common.h"
 #include "dsputil.h"
 #include "avcodec.h"
@@ -560,6 +562,106 @@ void ff_h263_update_motion_val(MpegEncContext * s){
 }
 
 #ifdef CONFIG_ENCODERS
+
+static inline int get_p_cbp(MpegEncContext * s,
+                      DCTELEM block[6][64],
+                      int motion_x, int motion_y){
+    int cbp, i;
+
+    if(s->flags & CODEC_FLAG_CBP_RD){
+        int best_cbpy_score= INT_MAX;
+        int best_cbpc_score= INT_MAX;
+        int cbpc, cbpy;
+        const int offset= (s->mv_type==MV_TYPE_16X16 ? 0 : 16) + (s->dquant ? 8 : 0);
+        const int lambda= (s->qscale*s->qscale*64*105 + 64)>>7; 
+
+        for(i=0; i<4; i++){
+            int score= inter_MCBPC_bits[i + offset] * lambda;
+            if(i&1) score += s->coded_score[5];
+            if(i&2) score += s->coded_score[4];
+
+            if(score < best_cbpc_score){
+                best_cbpc_score= score;
+                cbpc= i;
+            }
+        }
+
+        for(i=0; i<16; i++){
+            int score= cbpy_tab[i ^ 0xF][1] * lambda;
+            if(i&1) score += s->coded_score[3];
+            if(i&2) score += s->coded_score[2];
+            if(i&4) score += s->coded_score[1];
+            if(i&8) score += s->coded_score[0];
+
+            if(score < best_cbpy_score){
+                best_cbpy_score= score;
+                cbpy= i;
+            }
+        }
+        cbp= cbpc + 4*cbpy;
+        if ((motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16){
+            if(best_cbpy_score + best_cbpc_score + 2*lambda >= 0)
+                cbp= 0;
+        }
+
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
+                s->block_last_index[i]= -1;
+                memset(s->block[i], 0, sizeof(DCTELEM)*64);
+            }
+        }
+    }else{
+        cbp= 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+    }
+    return cbp;
+}
+
+static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
+                            int motion_x, int motion_y, int mb_type){
+    int cbp=0, i;
+
+    if(s->flags & CODEC_FLAG_CBP_RD){
+        int score=0;
+        const int lambda= (s->qscale*s->qscale*64*105 + 64)>>7; 
+        
+        for(i=0; i<6; i++){
+            if(s->coded_score[i] < 0){
+                score += s->coded_score[i];
+                cbp |= 1 << (5 - i);
+            }
+        }
+        
+        if(cbp){
+            int zero_score= -6;
+            if ((motion_x | motion_y | s->dquant | mb_type) == 0){
+                zero_score-= 4; //2*MV + mb_type + cbp bit
+            }
+
+            zero_score*= lambda;
+            if(zero_score <= score){
+                cbp=0;
+            }
+        }
+
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
+                s->block_last_index[i]= -1;
+                memset(s->block[i], 0, sizeof(DCTELEM)*64);
+            }
+        }
+    }else{
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+    }
+    return cbp;
+}
+
 void mpeg4_encode_mb(MpegEncContext * s,
 		    DCTELEM block[6][64],
 		    int motion_x, int motion_y)
@@ -574,12 +676,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
     //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         /* compute cbp */
-        int i, cbp = 0;
-        for (i = 0; i < 6; i++) {
-            if (s->block_last_index[i] >= 0)
-                cbp |= 1 << (5 - i);
-        }
-
+        int i, cbp;
+        
         if(s->pict_type==B_TYPE){
             static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
             int mb_type=  mb_type_table[s->mv_dir];
@@ -609,6 +707,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 return;
             }
             
+            cbp= get_b_cbp(s, block, motion_x, motion_y, mb_type);
+            
             if ((cbp | motion_x | motion_y | mb_type) ==0) {
                 /* direct MB with MV={0,0} */
                 assert(s->dquant==0);
@@ -699,6 +799,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->p_tex_bits+= get_bits_diff(s);
             }
         }else{ /* s->pict_type==B_TYPE */
+            cbp= get_p_cbp(s, block, motion_x, motion_y);
+        
             if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) {
                 /* check if the B frames can skip it too, as we must skip it if we skip here 
                    why didnt they just compress the skip-mb bits instead of reusing them ?! */
@@ -938,11 +1040,8 @@ void h263_encode_mb(MpegEncContext * s,
     //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         /* compute cbp */
-        cbp = 0;
-        for (i = 0; i < 6; i++) {
-            if (s->block_last_index[i] >= 0)
-                cbp |= 1 << (5 - i);
-        }
+        cbp= get_p_cbp(s, block, motion_x, motion_y);
+
         if ((cbp | motion_x | motion_y | s->dquant) == 0) {
             /* skip macroblock */
             put_bits(&s->pb, 1, 1);
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index fa915e366e..6387e18e37 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -632,6 +632,11 @@ int MPV_encode_init(AVCodecContext *avctx)
         return -1;
     }
         
+    if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
+        fprintf(stderr, "CBP RD needs trellis quant\n");
+        return -1;
+    }
+
     if(s->codec_id==CODEC_ID_MJPEG){
         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
         s->inter_quant_bias= 0;
@@ -3020,6 +3025,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             }else
                 s->block_last_index[i]= -1;
         }
+        if(s->flags & CODEC_FLAG_CBP_RD){
+            for(i=0;i<6;i++) {
+                if(s->block_last_index[i] == -1)
+                    s->coded_score[i]= INT_MAX/256;
+            }
+        }
+        
         if(s->luma_elim_threshold && !s->mb_intra)
             for(i=0; i<4; i++)
                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
@@ -3995,6 +4007,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     int last_level=0;
     int last_score= 0;
     int last_i= 0;
+    int not_coded_score= 0;
     int coeff[3][64];
     int coeff_count[64];
     int lambda, qmul, qadd, start_i, last_non_zero, i, dc;
@@ -4064,6 +4077,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
 //                coeff[2][k]= -level+2;
             }
             coeff_count[k]= FFMIN(level, 2);
+            assert(coeff_count[k]);
             max |=level;
             last_non_zero = i;
         }else{
@@ -4089,6 +4103,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
         int best_score=256*256*256*120;
 
         last_score += zero_distoration;
+        not_coded_score += zero_distoration;
         for(level_index=0; level_index < coeff_count[i]; level_index++){
             int distoration;
             int level= coeff[level_index][i];
@@ -4205,6 +4220,8 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             }
         }
     }
+
+    s->coded_score[n] = last_score - not_coded_score;
     
     dc= block[0];
     last_non_zero= last_i - 1 + start_i;
@@ -4212,13 +4229,13 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     
     if(last_non_zero < start_i)
         return last_non_zero;
-    
+
     if(last_non_zero == 0 && start_i == 0){
         int best_level= 0;
         int best_score= dc * dc;
-
+        
         for(i=0; i<coeff_count[0]; i++){
-            const int level= coeff[i][0];
+            int level= coeff[i][0];
             int unquant_coeff, score, distoration;
 
             if(s->out_format == FMT_H263){
@@ -4240,18 +4257,23 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             unquant_coeff<<= 3 + 3;
 
             distoration= (unquant_coeff - dc) * (unquant_coeff - dc);
-            score= distoration + last_length[UNI_AC_ENC_INDEX(0, level+64)]*lambda;
+            level+=64;
+            if((level&(~127)) == 0)
+                score= distoration + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
+            else
+                score= distoration + esc_length*lambda;
+
             if(score < best_score){
                 best_score= score;
-                best_level= level;
+                best_level= level - 64;
             }
         }
         block[0]= best_level;
-        if(best_level == 0) 
-            last_non_zero=-1;
-        return last_non_zero;
+        s->coded_score[n] = best_score - dc*dc;
+        if(best_level == 0) return -1;
+        else                return last_non_zero;
     }
-    
+
     i= last_i;
     assert(last_level);
 //FIXME use permutated scantable
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 58af196ea2..d46d4b6a97 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -453,6 +453,8 @@ typedef struct MpegEncContext {
     uint8_t *chroma_dc_vlc_length;
 #define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level))
 
+    int coded_score[6];
+
     /** precomputed matrix (combine qscale and DCT renorm) */
     int __align8 q_intra_matrix[32][64];
     int __align8 q_inter_matrix[32][64];