summaryrefslogtreecommitdiff
path: root/residual_calc.asm
diff options
context:
space:
mode:
Diffstat (limited to 'residual_calc.asm')
-rw-r--r--residual_calc.asm21
1 files changed, 4 insertions, 17 deletions
diff --git a/residual_calc.asm b/residual_calc.asm
index c51ba5e..3a5b800 100644
--- a/residual_calc.asm
+++ b/residual_calc.asm
@@ -39,7 +39,6 @@ SECTION .text
; mm register allocation (both s1 and s2)
; m0: accumulator for the residual
-; m1-m5: splatted constant finite difference coefficients
; m6-m11: working registers
; m12: max(fabs(residual))
; m13: mask for computing absolute values
@@ -90,7 +89,6 @@ SECTION .text
subpd m11, m8 ; m11 -= u[x+2]
addpd m11, m10 ; m11 += u[x-2]
%endif
- mulpd m11, m2
vfmadd231pd m0, m11, [coeffs1q + offsetq] ; res += d_x u * diff_coeffs10
; second derivative
@@ -102,7 +100,6 @@ SECTION .text
subpd m11, m10 ; m11 -= u[x-2]
%endif
subpd m11, m6 ; m11 -= fd0 u[x]
- mulpd m11, m5
vfmadd231pd m0, m11, [coeffs2q + offsetq] ; res += d_xx u * diff_coeffs20
%endmacro
@@ -139,7 +136,6 @@ SECTION .text
vfmadd123pd m6, m14, m7 ; m6 = 8 m6 + m7
%endif
- mulpd m6, m3
vfmadd231pd m0, m6, [diff_coeffs11q + offsetq] ; res += d_xy u * diff_coeffs11
%endmacro
@@ -147,15 +143,6 @@ SECTION .text
%macro RESIDUAL_CALC 1
%define stencil %1
- ; load and splat the finite difference factors
- movu m0, [fd_factorsq + OFF_DIFF_COEFF_01]
- vpermq m1, m0, 00000000b ; diff factor 01 -> m1
- vpermq m2, m0, 01010101b ; diff factor 10 -> m2
- vpermq m3, m0, 10101010b ; diff factor 11 -> m3
- vpermq m4, m0, 11111111b ; diff factor 02 -> m4
- movq xm0, [fd_factorsq + OFF_DIFF_COEFF_20]
- vpermq m5, m0, 00000000b ; diff factor 20 -> m5
- %define u_downq fd_factorsq ; reuse the fd_factors register after it is no longer needed
; compute the mask for absolute value
pcmpeqq m13, m13
@@ -266,11 +253,11 @@ SECTION .text
%endmacro
INIT_YMM fma3
-cglobal residual_calc_line_s1, 8, 14, 14, linesize, dst, res_max, stride, u, rhs, diff_coeffs, fd_factors,\
- diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_up
+cglobal residual_calc_line_s1, 7, 14, 14, linesize, dst, res_max, stride, u, rhs, diff_coeffs,\
+ diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_down, u_up
RESIDUAL_CALC 1
INIT_YMM fma3
-cglobal residual_calc_line_s2, 8, 15, 16, linesize, dst, res_max, stride, u, rhs, diff_coeffs, fd_factors,\
- diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_up, u_up2
+cglobal residual_calc_line_s2, 7, 15, 16, linesize, dst, res_max, stride, u, rhs, diff_coeffs,\
+ diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_down, u_up, u_up2
RESIDUAL_CALC 2