summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2008-07-19 21:53:54 +0000
committerMichael Niedermayer <michaelni@gmx.at>2008-07-19 21:53:54 +0000
commit4f20b45fbeea62fa149c8bc471d75e42a1f7469c (patch)
treef978ecc271252598e1cd7b6911278e80efd43c7f
parentd11f9e1b15e3aac6862bdfe2e5cb212b706e2c2f (diff)
Fix h264_loop_filter_strength_mmx2() so it works with PAFF.
fixed at least: CVFI1_Sony_D.jsv CVFI1_SVA_C.264 MR6_BT_B.h264 Originally committed as revision 14310 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/dsputil.h2
-rw-r--r--libavcodec/h264.c2
-rw-r--r--libavcodec/i386/h264dsp_mmx.c12
3 files changed, 13 insertions, 3 deletions
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 7a47b87b44..859993ec83 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -347,7 +347,7 @@ typedef struct DSPContext {
void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
// h264_loop_filter_strength: simd only. the C version is inlined in h264.c
void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
- int bidir, int edges, int step, int mask_mv0, int mask_mv1);
+ int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 4a02ae6795..2b3b5b28a2 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -6495,7 +6495,7 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
int step = IS_8x8DCT(mb_type) ? 2 : 1;
edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
- (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
+ (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
}
if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
bSv[0][0] = 0x0004000400040004ULL;
diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c
index 7207fd505c..f94f7088cf 100644
--- a/libavcodec/i386/h264dsp_mmx.c
+++ b/libavcodec/i386/h264dsp_mmx.c
@@ -20,6 +20,9 @@
#include "dsputil_mmx.h"
+DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
+DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
+
/***********************************/
/* IDCT */
@@ -623,7 +626,7 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
}
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
- int bidir, int edges, int step, int mask_mv0, int mask_mv1 ) {
+ int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
int dir;
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
@@ -632,6 +635,13 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
"movq %2, %%mm4 \n\t"
::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7)
);
+ if(field)
+ asm volatile(
+ "movq %0, %%mm5 \n\t"
+ "movq %1, %%mm4 \n\t"
+ ::"m"(ff_pb_3_1), "m"(ff_pb_7_3)
+ );
+
// could do a special case for dir==0 && edges==1, but it only reduces the
// average filter time by 1.2%
for( dir=1; dir>=0; dir-- ) {