From 52581fdefd8257249768deb837548e9c71eac57e Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 8 Apr 2019 10:42:15 +0200 Subject: x86: add a misc utility header --- transfer_interp.asm | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'transfer_interp.asm') diff --git a/transfer_interp.asm b/transfer_interp.asm index e711b8f..d02e188 100644 --- a/transfer_interp.asm +++ b/transfer_interp.asm @@ -18,10 +18,11 @@ %include "config.asm" %include "x86inc.asm" +%include "util.asm" SECTION .text -INIT_YMM fma3 +INIT_YMM avx2 cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\ idx_x_val shl src_strideq, 3 @@ -35,10 +36,10 @@ cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, i %define offsetq dst_lenq movu m0, [fact_yq] - vpermq m1, m0, 01010101b ; fact y + 1 -> m1 - vpermq m2, m0, 10101010b ; fact y + 2 -> m2 - vpermq m3, m0, 11111111b ; fact y + 3 -> m3 - vpermq m0, m0, 00000000b ; fact y + 0 -> m0 + SPLATPD m1, m0, 1 ; fact y + 1 -> m1 + SPLATPD m2, m0, 2 ; fact y + 2 -> m2 + SPLATPD m3, m0, 3 ; fact y + 3 -> m3 + SPLATPD m0, m0, 0 ; fact y + 0 -> m0 .loop: mov idx_x_valq, [idx_xq + offsetq] @@ -73,7 +74,7 @@ cglobal transfer_interp2d_line_cont_4, 7, 8, 6, dst, dst_len, src, src_stride, i RET -INIT_YMM fma3 +INIT_YMM avx2 cglobal transfer_interp2d_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride, idx_x, fact_x, fact_y,\ idx_x_val, offset6 shl src_strideq, 3 @@ -90,13 +91,13 @@ cglobal transfer_interp2d_line_cont_6, 7, 9, 11, dst, dst_len, src, src_stride, add offset6q, offset6q movu m0, [fact_yq] - vpermq m1, m0, 01010101b ; fact y + 1 -> m1 - vpermq m2, m0, 10101010b ; fact y + 2 -> m2 - vpermq m3, m0, 11111111b ; fact y + 3 -> m3 - vpermq m0, m0, 00000000b ; fact y + 0 -> m0 + SPLATPD m1, m0, 1 ; fact y + 1 -> m1 + SPLATPD m2, m0, 2 ; fact y + 2 -> m2 + SPLATPD m3, m0, 3 ; fact y + 3 -> m3 + SPLATPD m0, m0, 0 ; fact y + 0 -> m0 movu xm4, [fact_yq + 8 * 4] - vpermq m5, m4, 01010101b - vpermq m4, m4, 0 + SPLATPD m5, m4, 1 + SPLATPD m4, m4, 0 .loop: mov idx_x_valq, [idx_xq + offsetq] -- cgit v1.2.3