summaryrefslogtreecommitdiff
path: root/libavcodec/x86/hevc_deblock.asm
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-08-02 23:21:31 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-08-03 04:24:15 +0200
commitd0f56ca0710157144fe00c075dd508085df716ef (patch)
treef17db39ee285ff5aaabe4b1cc24c6a3a95b1f3cf /libavcodec/x86/hevc_deblock.asm
parent2e6fdcb7f3c86491408a3699f0aa9dc52b7c5686 (diff)
x86/hevc_deblock: improve 8bit transpose store macros
Up to four instructions less depending on function and instruction set. Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hevc_deblock.asm')
-rw-r--r--libavcodec/x86/hevc_deblock.asm70
1 files changed, 22 insertions, 48 deletions
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index 5951e86844..89c0f9bb64 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -77,16 +77,10 @@ INIT_XMM sse2
; in: 4 rows of 8 words in m0..m3
; out: 8 rows of 4 bytes in %1..%8
%macro TRANSPOSE8x4B_STORE 8
- packuswb m0, m0
- packuswb m1, m1
- packuswb m2, m2
- packuswb m3, m3
-
- punpcklbw m0, m1
- punpcklbw m2, m3
-
- punpckhwd m6, m0, m2
- punpcklwd m0, m2
+ packuswb m0, m2
+ packuswb m1, m3
+ SBUTTERFLY bw, 0, 1, 2
+ SBUTTERFLY wd, 0, 1, 2
movd %1, m0
pshufd m0, m0, 0x39
@@ -96,13 +90,13 @@ INIT_XMM sse2
pshufd m0, m0, 0x39
movd %4, m0
- movd %5, m6
- pshufd m6, m6, 0x39
- movd %6, m6
- pshufd m6, m6, 0x39
- movd %7, m6
- pshufd m6, m6, 0x39
- movd %8, m6
+ movd %5, m1
+ pshufd m1, m1, 0x39
+ movd %6, m1
+ pshufd m1, m1, 0x39
+ movd %7, m1
+ pshufd m1, m1, 0x39
+ movd %8, m1
%endmacro
; in: 8 rows of 4 words in %4..%11
@@ -204,40 +198,20 @@ INIT_XMM sse2
; in: 8 rows of 8 words in m0..m8
; out: 8 rows of 8 bytes in %1..%8
%macro TRANSPOSE8x8B_STORE 8
- packuswb m0, m0
- packuswb m1, m1
- packuswb m2, m2
- packuswb m3, m3
- packuswb m4, m4
- packuswb m5, m5
- packuswb m6, m6
- packuswb m7, m7
-
- punpcklbw m0, m1
- punpcklbw m2, m3
-
- punpckhwd m8, m0, m2
- punpcklwd m0, m2
-
- punpcklbw m4, m5
- punpcklbw m6, m7
-
- punpckhwd m9, m4, m6
- punpcklwd m4, m6
+ packuswb m0, m4
+ packuswb m1, m5
+ packuswb m2, m6
+ packuswb m3, m7
+ TRANSPOSE2x4x4B 0, 1, 2, 3, 4
- punpckhdq m10, m0, m4; 2, 3
- punpckldq m0, m4; 0, 1
-
- punpckldq m11, m8, m9; 4, 5
- punpckhdq m8, m9; 6, 7
movq %1, m0
movhps %2, m0
- movq %3, m10
- movhps %4, m10
- movq %5, m11
- movhps %6, m11
- movq %7, m8
- movhps %8, m8
+ movq %3, m1
+ movhps %4, m1
+ movq %5, m2
+ movhps %6, m2
+ movq %7, m3
+ movhps %8, m3
%endmacro
; in: 8 rows of 8 words in %1..%8