summaryrefslogtreecommitdiff
path: root/libavcodec/h264_mvpred.h
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-07-10 04:28:50 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-07-10 04:28:50 +0200
commit2f56a97f2488146f209de415e3338f7e597c5719 (patch)
treedc5738f578f616d5af685391e82220452b3fbb3e /libavcodec/h264_mvpred.h
parenta7d3a51dd1efa3073cc9d419a73f709f784ce267 (diff)
parent7f7dc4fb55904e7b51b78ebca746c6755fc9770c (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: (22 commits) H.264: fix filter_mb_fast with 4:4:4 + 8x8dct alsa: limit buffer_size to 32768 frames. alsa: fallback to buffer_size/4 for period_size. doc: replace @pxref by @ref where appropriate mpeg1video: don't abort if thread_count is too high. segafilm: add support for videos with cri adx adpcm gxf: Fix 25 fps DV material in GXF being misdetected as 50 fps libxvid: Add const qualifier to silence compiler warning. H.264: improve qp_thresh check H.264: use fill_rectangle in CABAC decoding H.264: Remove redundant hl_motion_16/8 code H.264: merge fill_rectangle into P-SKIP MV prediction, to match B-SKIP H.264: faster P-SKIP decoding H.264: av_always_inline some more functions H.264: Add x86 assembly for 10-bit H.264 predict functions swscale: rename uv_off/uv_off2 to uv_off_px/byte. swscale: implement error dithering in planarCopyWrapper. swscale: error dithering for 16/9/10-bit to 8-bit. swscale: fix overflow in 16-bit vertical scaling. swscale: fix crash in 8-bpc bilinear output without alpha. ... Conflicts: doc/developer.texi libavdevice/alsa-audio.h libavformat/gxf.c libswscale/swscale.c libswscale/swscale_internal.h libswscale/swscale_unscaled.c libswscale/x86/swscale_template.c tests/ref/lavfi/pixdesc tests/ref/lavfi/pixfmts_copy tests/ref/lavfi/pixfmts_crop tests/ref/lavfi/pixfmts_hflip tests/ref/lavfi/pixfmts_null tests/ref/lavfi/pixfmts_scale tests/ref/lavfi/pixfmts_vflip Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/h264_mvpred.h')
-rw-r--r--libavcodec/h264_mvpred.h123
1 files changed, 109 insertions, 14 deletions
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index a0886d5d47..dc146a4803 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -35,7 +35,7 @@
//#undef NDEBUG
#include <assert.h>
-static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
+static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
MpegEncContext *s = &h->s;
@@ -92,7 +92,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
-static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
const int index8= scan8[n];
const int top_ref= h->ref_cache[list][ index8 - 8 ];
const int left_ref= h->ref_cache[list][ index8 - 1 ];
@@ -147,7 +147,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
-static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
if(n==0){
const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
@@ -182,7 +182,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
-static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
if(n==0){
const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
@@ -213,22 +213,117 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
pred_motion(h, n, 2, list, ref, mx, my);
}
-static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
- const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
- const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
+#define FIX_MV_MBAFF(type, refn, mvn, idx)\
+ if(FRAME_MBAFF){\
+ if(MB_FIELD){\
+ if(!IS_INTERLACED(type)){\
+ refn <<= 1;\
+ AV_COPY32(mvbuf[idx], mvn);\
+ mvbuf[idx][1] /= 2;\
+ mvn = mvbuf[idx];\
+ }\
+ }else{\
+ if(IS_INTERLACED(type)){\
+ refn >>= 1;\
+ AV_COPY32(mvbuf[idx], mvn);\
+ mvbuf[idx][1] <<= 1;\
+ mvn = mvbuf[idx];\
+ }\
+ }\
+ }
- tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+static av_always_inline void pred_pskip_motion(H264Context * const h){
+ DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
+ DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
+ MpegEncContext * const s = &h->s;
+ int8_t *ref = s->current_picture.ref_index[0];
+ int16_t (*mv)[2] = s->current_picture.motion_val[0];
+ int top_ref, left_ref, diagonal_ref, match_count, mx, my;
+ const int16_t *A, *B, *C;
+ int b_stride = h->b_stride;
+
+ fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+
+ /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here.
+ * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's
+ * faster this way. Is there a way to avoid this duplication?
+ */
+ if(USES_LIST(h->left_type[LTOP], 0)){
+ left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
+ A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
+ FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
+ if(!(left_ref | AV_RN32A(A))){
+ goto zeromv;
+ }
+ }else if(h->left_type[LTOP]){
+ left_ref = LIST_NOT_USED;
+ A = zeromv;
+ }else{
+ goto zeromv;
+ }
- if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
- || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ]))
- || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){
+ if(USES_LIST(h->top_type, 0)){
+ top_ref = ref[4*h->top_mb_xy + 2];
+ B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
+ FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
+ if(!(top_ref | AV_RN32A(B))){
+ goto zeromv;
+ }
+ }else if(h->top_type){
+ top_ref = LIST_NOT_USED;
+ B = zeromv;
+ }else{
+ goto zeromv;
+ }
+
+ tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
- *mx = *my = 0;
- return;
+ if(USES_LIST(h->topright_type, 0)){
+ diagonal_ref = ref[4*h->topright_mb_xy + 2];
+ C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride];
+ FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
+ }else if(h->topright_type){
+ diagonal_ref = LIST_NOT_USED;
+ C = zeromv;
+ }else{
+ if(USES_LIST(h->topleft_type, 0)){
+ diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)];
+ C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)];
+ FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
+ }else if(h->topleft_type){
+ diagonal_ref = LIST_NOT_USED;
+ C = zeromv;
+ }else{
+ diagonal_ref = PART_NOT_AVAILABLE;
+ C = zeromv;
+ }
}
- pred_motion(h, 0, 4, 0, 0, mx, my);
+ match_count= !diagonal_ref + !top_ref + !left_ref;
+ tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
+ if(match_count > 1){
+ mx = mid_pred(A[0], B[0], C[0]);
+ my = mid_pred(A[1], B[1], C[1]);
+ }else if(match_count==1){
+ if(!left_ref){
+ mx = A[0];
+ my = A[1];
+ }else if(!top_ref){
+ mx = B[0];
+ my = B[1];
+ }else{
+ mx = C[0];
+ my = C[1];
+ }
+ }else{
+ mx = mid_pred(A[0], B[0], C[0]);
+ my = mid_pred(A[1], B[1], C[1]);
+ }
+ fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+ return;
+zeromv:
+ fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
return;
}