summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorJames Darnley <jdarnley@obe.tv>2016-11-28 14:26:53 +0100
committerJames Darnley <jdarnley@obe.tv>2016-11-30 22:58:27 +0100
commit815ea8c6ccf7a1a8154a829f948d7c98120ad084 (patch)
treeacb200e78887ada8fa9cf869688c1b5ac70d3776 /libavcodec/x86
parent122190392b297b7cd9783641d880df887fd80d07 (diff)
avcodec/h264: mmxext 4:2:2 chroma intra deblock/loop filter
2.1 times faster (401 vs. 194 cycles)
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/h264_deblock.asm14
-rw-r--r--libavcodec/x86/h264dsp_init.c2
2 files changed, 16 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 4aabbc0832..fe0ab20266 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -946,6 +946,20 @@ cglobal deblock_h_chroma_intra_8, 4,6
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
RET
+cglobal deblock_h_chroma422_intra_8, 4, 6
+ CHROMA_H_START
+ TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
+ call ff_chroma_intra_body_mmxext
+ TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
+
+ lea r0, [r0+r1*8]
+ lea t5, [t5+r1*8]
+
+ TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
+ call ff_chroma_intra_body_mmxext
+ TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
+RET
+
ALIGN 16
ff_chroma_intra_body_mmxext:
LOAD_MASK r2d, r3d
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index d2452c7131..027c1ae0b3 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -130,6 +130,7 @@ LF_FUNCS(uint8_t, 8)
LF_FUNCS(uint16_t, 10)
void ff_deblock_h_chroma422_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+LF_IFUNC(h, chroma422_intra, 8, mmxext)
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
LF_FUNC(v8, luma, 8, mmxext)
@@ -249,6 +250,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
} else {
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
+ c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_mmxext;
}
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
c->h264_v_loop_filter_luma = deblock_v_luma_8_mmxext;