summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Garrett-Glaser <darkshikari@gmail.com>2010-06-28 23:53:07 +0000
committerJason Garrett-Glaser <darkshikari@gmail.com>2010-06-28 23:53:07 +0000
commitfb9927ad7dca23d348fa4d915328e030c40f81de (patch)
tree81787356716c5922284f8f32f61b47acad9f2e94
parent8b746bb47308730ede2fe06e64ddfa66b64c6450 (diff)
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
Originally committed as revision 23875 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/x86/h264_intrapred.asm78
-rw-r--r--libavcodec/x86/h264dsp_mmx.c6
2 files changed, 84 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index 085460cdfd..2bd19f9ed4 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -494,3 +494,81 @@ cglobal pred4x4_dc_mmxext, 3,5
mov [r0+r2*1], r3d
mov [r0+r2*2], r3d
RET
+
+;-----------------------------------------------------------------------------
+; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED4x4_TM_MMX 1
+cglobal pred4x4_tm_vp8_%1, 3,6
+ sub r0, r2
+ pxor mm7, mm7
+ movd mm0, [r0]
+ punpcklbw mm0, mm7
+ movzx r4d, byte [r0-1]
+ mov r5d, 2
+.loop:
+ movzx r1d, byte [r0+r2*1-1]
+ movzx r3d, byte [r0+r2*2-1]
+ sub r1d, r4d
+ sub r3d, r4d
+ movd mm2, r1d
+ movd mm4, r3d
+%ifidn %1, mmx
+ punpcklwd mm2, mm2
+ punpcklwd mm4, mm4
+ punpckldq mm2, mm2
+ punpckldq mm4, mm4
+%else
+ pshufw mm2, mm2, 0
+ pshufw mm4, mm4, 0
+%endif
+ paddw mm2, mm0
+ paddw mm4, mm0
+ packuswb mm2, mm2
+ packuswb mm4, mm4
+ movd [r0+r2*1], mm2
+ movd [r0+r2*2], mm4
+ lea r0, [r0+r2*2]
+ dec r5d
+ jg .loop
+ REP_RET
+%endmacro
+
+PRED4x4_TM_MMX mmx
+PRED4x4_TM_MMX mmxext
+
+cglobal pred4x4_tm_vp8_ssse3, 3,3
+ sub r0, r2
+ movq mm6, [tm_shuf]
+ pxor mm1, mm1
+ movd mm0, [r0]
+ punpcklbw mm0, mm1
+ movd mm7, [r0-4]
+ pshufb mm7, mm6
+ lea r1, [r0+r2*2]
+ movd mm2, [r0+r2*1-4]
+ movd mm3, [r0+r2*2-4]
+ movd mm4, [r1+r2*1-4]
+ movd mm5, [r1+r2*2-4]
+ pshufb mm2, mm6
+ pshufb mm3, mm6
+ pshufb mm4, mm6
+ pshufb mm5, mm6
+ psubw mm2, mm7
+ psubw mm3, mm7
+ psubw mm4, mm7
+ psubw mm5, mm7
+ paddw mm2, mm0
+ paddw mm3, mm0
+ paddw mm4, mm0
+ paddw mm5, mm0
+ packuswb mm2, mm2
+ packuswb mm3, mm3
+ packuswb mm4, mm4
+ packuswb mm5, mm5
+ movd [r0+r2*1], mm2
+ movd [r0+r2*2], mm3
+ movd [r1+r2*1], mm4
+ movd [r1+r2*2], mm5
+ RET
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 118dd7bd7e..60f81a64be 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -2345,6 +2345,9 @@ void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride);
#if CONFIG_H264DSP
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
@@ -2358,6 +2361,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
if (codec_id == CODEC_ID_VP8) {
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
+ h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx;
}
}
@@ -2370,6 +2374,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext;
+ h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext;
}
}
@@ -2392,6 +2397,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
if (codec_id == CODEC_ID_VP8) {
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
+ h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3;
}
}
#endif