summaryrefslogtreecommitdiff
path: root/libavcodec/i386
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2006-10-01 21:25:17 +0000
committerLoren Merritt <lorenm@u.washington.edu>2006-10-01 21:25:17 +0000
commit2833fc46462abd841e3d69242be9919c98ad3696 (patch)
tree4e9513d1913ff238c35e066877c8795b40d561c8 /libavcodec/i386
parente0769997cb753d8a3c9039b1de1542987cfc0692 (diff)
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
Originally committed as revision 6412 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386')
-rw-r--r--libavcodec/i386/dsputil_mmx.c57
-rw-r--r--libavcodec/i386/dsputil_mmx_avg.h48
2 files changed, 105 insertions, 0 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 09a7e1e1b8..6ec808e9bd 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -2400,6 +2400,53 @@ QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)
QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
+/***********************************/
+/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
+
+#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE);\
+}
+#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src+S0, stride, SIZE, S1, S2);\
+}
+
+#define QPEL_2TAP(OPNAME, SIZE, MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx)\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX =\
+ OPNAME ## qpel ## SIZE ## _mc00_ ## MMX;\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX =\
+ OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX;\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX =\
+ OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX;\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src+1, stride, SIZE);\
+}\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src+stride, stride, SIZE);\
+}\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0, 1, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1, -1, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0, stride, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride, -stride, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0, stride, 1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride+1, -stride, -1)\
+
+QPEL_2TAP(put_, 16, mmx2)
+QPEL_2TAP(avg_, 16, mmx2)
+QPEL_2TAP(put_, 8, mmx2)
+QPEL_2TAP(avg_, 8, mmx2)
+QPEL_2TAP(put_, 16, 3dnow)
+QPEL_2TAP(avg_, 16, 3dnow)
+QPEL_2TAP(put_, 8, 3dnow)
+QPEL_2TAP(avg_, 8, 3dnow)
+
+
#if 0
static void just_return() { return; }
#endif
@@ -3276,6 +3323,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_h264_qpel, 0, 16);
dspfunc(avg_h264_qpel, 1, 8);
dspfunc(avg_h264_qpel, 2, 4);
+
+ dspfunc(put_2tap_qpel, 0, 16);
+ dspfunc(put_2tap_qpel, 1, 8);
+ dspfunc(avg_2tap_qpel, 0, 16);
+ dspfunc(avg_2tap_qpel, 1, 8);
#undef dspfunc
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2;
@@ -3399,6 +3451,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_h264_qpel, 1, 8);
dspfunc(avg_h264_qpel, 2, 4);
+ dspfunc(put_2tap_qpel, 0, 16);
+ dspfunc(put_2tap_qpel, 1, 8);
+ dspfunc(avg_2tap_qpel, 0, 16);
+ dspfunc(avg_2tap_qpel, 1, 8);
+
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
}
diff --git a/libavcodec/i386/dsputil_mmx_avg.h b/libavcodec/i386/dsputil_mmx_avg.h
index 8692fb0487..6dedc57647 100644
--- a/libavcodec/i386/dsputil_mmx_avg.h
+++ b/libavcodec/i386/dsputil_mmx_avg.h
@@ -818,3 +818,51 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin
DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
}
+#define QPEL_2TAP_L3(OPNAME) \
+static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+ asm volatile(\
+ "1: \n\t"\
+ "movq (%1,%2), %%mm0 \n\t"\
+ "movq 8(%1,%2), %%mm1 \n\t"\
+ PAVGB" (%1,%3), %%mm0 \n\t"\
+ PAVGB" 8(%1,%3), %%mm1 \n\t"\
+ PAVGB" (%1), %%mm0 \n\t"\
+ PAVGB" 8(%1), %%mm1 \n\t"\
+ STORE_OP( (%1,%4),%%mm0)\
+ STORE_OP(8(%1,%4),%%mm1)\
+ "movq %%mm0, (%1,%4) \n\t"\
+ "movq %%mm1, 8(%1,%4) \n\t"\
+ "add %5, %1 \n\t"\
+ "decl %0 \n\t"\
+ "jnz 1b \n\t"\
+ :"+g"(h), "+r"(src)\
+ :"r"((long)off1), "r"((long)off2),\
+ "r"((long)(dst-src)), "r"((long)stride)\
+ :"memory"\
+ );\
+}\
+static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+ asm volatile(\
+ "1: \n\t"\
+ "movq (%1,%2), %%mm0 \n\t"\
+ PAVGB" (%1,%3), %%mm0 \n\t"\
+ PAVGB" (%1), %%mm0 \n\t"\
+ STORE_OP((%1,%4),%%mm0)\
+ "movq %%mm0, (%1,%4) \n\t"\
+ "add %5, %1 \n\t"\
+ "decl %0 \n\t"\
+ "jnz 1b \n\t"\
+ :"+g"(h), "+r"(src)\
+ :"r"((long)off1), "r"((long)off2),\
+ "r"((long)(dst-src)), "r"((long)stride)\
+ :"memory"\
+ );\
+}
+
+#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
+QPEL_2TAP_L3(avg_)
+#undef STORE_OP
+#define STORE_OP(a,b)
+QPEL_2TAP_L3(put_)
+#undef STORE_OP
+#undef QPEL_2TAP_L3