summaryrefslogtreecommitdiff
path: root/libavutil/x86
diff options
context:
space:
mode:
authorJames Darnley <jdarnley@obe.tv>2017-02-10 23:17:57 +0100
committerJames Darnley <jdarnley@obe.tv>2017-02-18 20:26:52 +0100
commit533688786799b22d0711eedcfe8f84deea014f30 (patch)
treed4a4655fd9916238a27b5054bd8d92db8e6482bd /libavutil/x86
parent7627df15d411a69f236b4650e88b1ab911f38efc (diff)
avcodec/h264: sse2, avx h luma mbaff deblock/loop filter
x86-64 only Yorkfield: - sse2: ~2.17x (434 vs. 200 cycles) Nehalem: - sse2: ~2.94x (409 vs. 139 cycles) Skylake: - sse2: ~3.10x (370 vs. 119 cycles) - avx: ~3.29x (370 vs. 112 cycles)
Diffstat (limited to 'libavutil/x86')
-rw-r--r--libavutil/x86/x86util.asm15
1 files changed, 15 insertions, 0 deletions
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 1408f0a176..c50ddc6c54 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -265,6 +265,21 @@
SWAP %12, %15
%endmacro
+%macro TRANSPOSE_8X8B 8
+ %if mmsize == 8
+ %error "This macro does not support mmsize == 8"
+ %endif
+ punpcklbw m%1, m%2
+ punpcklbw m%3, m%4
+ punpcklbw m%5, m%6
+ punpcklbw m%7, m%8
+ TRANSPOSE4x4W %1, %3, %5, %7, %2
+ MOVHL m%2, m%1
+ MOVHL m%4, m%3
+ MOVHL m%6, m%5
+ MOVHL m%8, m%7
+%endmacro
+
; PABSW macro assumes %1 != %2, while ABS1/2 macros work in-place
%macro PABSW 2
%if cpuflag(ssse3)