aboutsummaryrefslogtreecommitdiff
path: root/transfer_interp.asm
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-04-08 10:42:15 +0200
committerAnton Khirnov <anton@khirnov.net>2024-04-16 14:48:47 +0200
commit52581fdefd8257249768deb837548e9c71eac57e (patch)
tree61375e8a7b53bbea8b49f707e2c5fa5b09742529 /transfer_interp.asm
parenta2df7298cd7e2dba47cb7274b8b7d983265e7279 (diff)
x86: add a misc utility header
Diffstat (limited to 'transfer_interp.asm')
-rw-r--r--transfer_interp.asm25
1 files changed, 13 insertions, 12 deletions
diff --git a/transfer_interp.asm b/transfer_interp.asm
index e711b8f..d02e188 100644
--- a/transfer_interp.asm
+++ b/transfer_interp.asm
@@ -18,10 +18,11 @@
%include "config.asm"
%include "x86inc.asm"
+%include "util.asm"
SECTION .text
-INIT_YMM fma3
+INIT_YMM avx2
cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\
idx_x_val
shl src_strideq, 3
@@ -35,10 +36,10 @@ cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, i
%define offsetq dst_lenq
movu m0, [fact_yq]
- vpermq m1, m0, 01010101b ; fact y + 1 -> m1
- vpermq m2, m0, 10101010b ; fact y + 2 -> m2
- vpermq m3, m0, 11111111b ; fact y + 3 -> m3
- vpermq m0, m0, 00000000b ; fact y + 0 -> m0
+ SPLATPD m1, m0, 1 ; fact y + 1 -> m1
+ SPLATPD m2, m0, 2 ; fact y + 2 -> m2
+ SPLATPD m3, m0, 3 ; fact y + 3 -> m3
+ SPLATPD m0, m0, 0 ; fact y + 0 -> m0
.loop:
mov idx_x_valq, [idx_xq + offsetq]
@@ -73,7 +74,7 @@ cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, i
RET
-INIT_YMM fma3
+INIT_YMM avx2
cglobal transfer_interp2d_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\
idx_x_val, offset6
shl src_strideq, 3
@@ -90,13 +91,13 @@ cglobal transfer_interp2d_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride,
add offset6q, offset6q
movu m0, [fact_yq]
- vpermq m1, m0, 01010101b ; fact y + 1 -> m1
- vpermq m2, m0, 10101010b ; fact y + 2 -> m2
- vpermq m3, m0, 11111111b ; fact y + 3 -> m3
- vpermq m0, m0, 00000000b ; fact y + 0 -> m0
+ SPLATPD m1, m0, 1 ; fact y + 1 -> m1
+ SPLATPD m2, m0, 2 ; fact y + 2 -> m2
+ SPLATPD m3, m0, 3 ; fact y + 3 -> m3
+ SPLATPD m0, m0, 0 ; fact y + 0 -> m0
movu xm4, [fact_yq + 8 * 4]
- vpermq m5, m4, 01010101b
- vpermq m4, m4, 0
+ SPLATPD m5, m4, 1
+ SPLATPD m4, m4, 0
.loop:
mov idx_x_valq, [idx_xq + offsetq]