summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/dsputil.c75
-rw-r--r--libavcodec/dsputil.h5
-rw-r--r--libavcodec/snow.c14
-rw-r--r--tests/ffmpeg.regression.ref4
-rw-r--r--tests/rotozoom.regression.ref4
5 files changed, 90 insertions, 12 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 3587ebf8b2..c7ae725c6f 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -1487,6 +1487,17 @@ H264_CHROMA_MC(avg_ , op_avg)
#undef op_avg
#undef op_put
+static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ ST16(dst , LD16(src ));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
+
static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
@@ -2052,6 +2063,68 @@ QPEL_MC(0, avg_ , _ , op_avg)
#if 1
#define H264_LOWPASS(OPNAME, OP, OP2) \
+static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ const int h=2;\
+ uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ int i;\
+ for(i=0; i<h; i++)\
+ {\
+ OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+ OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+ dst+=dstStride;\
+ src+=srcStride;\
+ }\
+}\
+\
+static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ const int w=2;\
+ uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ int i;\
+ for(i=0; i<w; i++)\
+ {\
+ const int srcB= src[-2*srcStride];\
+ const int srcA= src[-1*srcStride];\
+ const int src0= src[0 *srcStride];\
+ const int src1= src[1 *srcStride];\
+ const int src2= src[2 *srcStride];\
+ const int src3= src[3 *srcStride];\
+ const int src4= src[4 *srcStride];\
+ OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+ OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+ dst++;\
+ src++;\
+ }\
+}\
+\
+static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ const int h=2;\
+ const int w=2;\
+ uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ int i;\
+ src -= 2*srcStride;\
+ for(i=0; i<h+5; i++)\
+ {\
+ tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+ tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+ tmp+=tmpStride;\
+ src+=srcStride;\
+ }\
+ tmp -= tmpStride*(h+5-2);\
+ for(i=0; i<w; i++)\
+ {\
+ const int tmpB= tmp[-2*tmpStride];\
+ const int tmpA= tmp[-1*tmpStride];\
+ const int tmp0= tmp[0 *tmpStride];\
+ const int tmp1= tmp[1 *tmpStride];\
+ const int tmp2= tmp[2 *tmpStride];\
+ const int tmp3= tmp[3 *tmpStride];\
+ const int tmp4= tmp[4 *tmpStride];\
+ OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+ OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+ dst++;\
+ tmp++;\
+ }\
+}\
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
const int h=4;\
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
@@ -2398,6 +2471,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, i
H264_LOWPASS(put_ , op_put, op2_put)
H264_LOWPASS(avg_ , op_avg, op2_avg)
+H264_MC(put_, 2)
H264_MC(put_, 4)
H264_MC(put_, 8)
H264_MC(put_, 16)
@@ -3879,6 +3953,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
dspfunc(put_h264_qpel, 0, 16);
dspfunc(put_h264_qpel, 1, 8);
dspfunc(put_h264_qpel, 2, 4);
+ dspfunc(put_h264_qpel, 3, 2);
dspfunc(avg_h264_qpel, 0, 16);
dspfunc(avg_h264_qpel, 1, 8);
dspfunc(avg_h264_qpel, 2, 4);
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 969fdbcb69..fdcea72129 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -253,8 +253,8 @@ typedef struct DSPContext {
h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
- qpel_mc_func put_h264_qpel_pixels_tab[3][16];
- qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
+ qpel_mc_func put_h264_qpel_pixels_tab[4][16];
+ qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
h264_weight_func weight_h264_pixels_tab[10];
h264_biweight_func biweight_h264_pixels_tab[10];
@@ -510,6 +510,7 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
#define LD32(a) (((const struct unaligned_32 *) (a))->l)
#define LD64(a) (((const struct unaligned_64 *) (a))->l)
+#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
#else /* __GNUC__ */
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 0aa56dba61..20d24f4fcb 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -2500,6 +2500,7 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp,
int my= block->my*scale;
const int dx= mx&15;
const int dy= my&15;
+ const int tab_index= 3 - (b_w>>2) + (b_w>>4);
sx += (mx>>4) - 2;
sy += (my>>4) - 2;
src += sx + sy*stride;
@@ -2511,17 +2512,18 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp,
assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
assert(!(b_w&(b_w-1)));
assert(b_w>1 && b_h>1);
- if((dx&3) || (dy&3) || b_w==2 || b_h==2)
+ assert(tab_index>=0 && tab_index<4);
+ if((dx&3) || (dy&3))
mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
else if(b_w==b_h)
- s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
+ s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
else if(b_w==2*b_h){
- s->dsp.put_h264_qpel_pixels_tab[2-(b_h>>3)][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
- s->dsp.put_h264_qpel_pixels_tab[2-(b_h>>3)][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
+ s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
+ s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
}else{
assert(2*b_w==b_h);
- s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
- s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
+ s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
+ s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
}
}
}
diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref
index c56846fa65..f845e192d6 100644
--- a/tests/ffmpeg.regression.ref
+++ b/tests/ffmpeg.regression.ref
@@ -123,8 +123,8 @@ stddev: 0.00 PSNR:99.99 bytes:7602176
1197138 ./data/a-snow.avi
e7c746171b092266b0cf55bb5de2a40a *./data/out.yuv
stddev: 2.89 PSNR:38.87 bytes:7602176
-94bedf8929178a8202ae3b5dbcdb84dd *./data/a-snow53.avi
-3533696 ./data/a-snow53.avi
+11fd61ee7e67ef7a7b2a3df973691305 *./data/a-snow53.avi
+3533710 ./data/a-snow53.avi
799d3db687f6cdd7a837ec156efc171f *./data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
e1da20e3f52f4d2aa18e9486986161fc *./data/a-dv.dv
diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref
index 4eda12d065..33280ef61b 100644
--- a/tests/rotozoom.regression.ref
+++ b/tests/rotozoom.regression.ref
@@ -123,8 +123,8 @@ b926518ac399c7af0f218a7115315b4f *./data/a-snow.avi
286800 ./data/a-snow.avi
6c59db71d950610f854d05e2cef18609 *./data/out.yuv
stddev: 2.32 PSNR:40.80 bytes:7602176
-4d2bcc832e318fad3c25614e31daa6fe *./data/a-snow53.avi
-2725630 ./data/a-snow53.avi
+3f20642bb789dfb75ae3e8c03f9b425c *./data/a-snow53.avi
+2725570 ./data/a-snow53.avi
dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
a553532dcd54c1c421b52c3b6fece6ef *./data/a-dv.dv