From 24103bc5ae3d568ca97bdb70175b975fa680546c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 30 Jun 2017 09:44:25 +0200 Subject: Refactor conformal factor evaluation. --- expansion.asm | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 expansion.asm (limited to 'expansion.asm') diff --git a/expansion.asm b/expansion.asm new file mode 100644 index 0000000..4ac77bf --- /dev/null +++ b/expansion.asm @@ -0,0 +1,91 @@ +%include "x86util.asm" + +SECTION .text + +; len1 len2 +; compute vec2^T·mat·vec1 = ∑ ∑ mat[i, j] vec1[i] vec2[j] +; i=1 j=1 +%macro SCALARPRODUCT_METRIC 0 +cglobal scalarproduct_metric, 5, 7, 7, len1, len2, mat, vec1, vec2, rowpos + shl len2q, 3 + shl len1q, 3 + + add vec1q, len1q + add vec2q, len2q + neg len2q + + lea r6, [3 * len1q] + + xorpd m0, m0 + +.loop_2 + mov rowposq, len1q + neg rowposq + + xorpd m1, m1 + xorpd m2, m2 + +%if mmsize == 32 + xorpd m3, m3 + xorpd m4, m4 +%endif + +.loop_1 + mova m5, [vec1q + rowposq] + +%if mmsize == 32 + FMULADD_PD m4, m5, [matq + r6q], m4, m6 + FMULADD_PD m3, m5, [matq + 2 * len1q], m3, m6 +%endif + + FMULADD_PD m2, m5, [matq + 1 * len1q], m2, m6 + FMULADD_PD m1, m5, [matq + 0 * len1q], m1, m6 + + add matq, mmsize + add rowposq, mmsize + js .loop_1 + + haddpd m1, m2 + +%if mmsize == 32 + vextractf128 xmm2, ymm1, 1 + addpd xmm1, xmm2 + + haddpd m3, m4 + vextractf128 xmm4, ymm3, 1 + addpd xmm3, xmm4 + + vinsertf128 ymm1, ymm1, xmm3, 1 +%endif + + FMULADD_PD m0, m1, [vec2q + len2q], m0, m6 + +%if mmsize == 32 + add matq, r6 +%else + add matq, len1q +%endif + add len2q, mmsize + js .loop_2 + + haddpd m0, m0 + +%if mmsize == 32 + vextractf128 xmm1, ymm0, 1 + addpd xmm0, xmm1 +%endif + + emms + + RET +%endmacro + +INIT_XMM sse3 +SCALARPRODUCT_METRIC + +INIT_YMM avx +SCALARPRODUCT_METRIC + +INIT_YMM fma3 +SCALARPRODUCT_METRIC + -- cgit v1.2.3