diff options
Diffstat (limited to 'residual_calc.asm')
-rw-r--r-- | residual_calc.asm | 21 |
1 files changed, 4 insertions, 17 deletions
diff --git a/residual_calc.asm b/residual_calc.asm index c51ba5e..3a5b800 100644 --- a/residual_calc.asm +++ b/residual_calc.asm @@ -39,7 +39,6 @@ SECTION .text ; mm register allocation (both s1 and s2) ; m0: accumulator for the residual -; m1-m5: splatted constant finite difference coefficients ; m6-m11: working registers ; m12: max(fabs(residual)) ; m13: mask for computing absolute values @@ -90,7 +89,6 @@ SECTION .text subpd m11, m8 ; m11 -= u[x+2] addpd m11, m10 ; m11 += u[x-2] %endif - mulpd m11, m2 vfmadd231pd m0, m11, [coeffs1q + offsetq] ; res += d_x u * diff_coeffs10 ; second derivative @@ -102,7 +100,6 @@ SECTION .text subpd m11, m10 ; m11 -= u[x-2] %endif subpd m11, m6 ; m11 -= fd0 u[x] - mulpd m11, m5 vfmadd231pd m0, m11, [coeffs2q + offsetq] ; res += d_xx u * diff_coeffs20 %endmacro @@ -139,7 +136,6 @@ SECTION .text vfmadd123pd m6, m14, m7 ; m6 = 8 m6 + m7 %endif - mulpd m6, m3 vfmadd231pd m0, m6, [diff_coeffs11q + offsetq] ; res += d_xy u * diff_coeffs11 %endmacro @@ -147,15 +143,6 @@ SECTION .text %macro RESIDUAL_CALC 1 %define stencil %1 - ; load and splat the finite difference factors - movu m0, [fd_factorsq + OFF_DIFF_COEFF_01] - vpermq m1, m0, 00000000b ; diff factor 01 -> m1 - vpermq m2, m0, 01010101b ; diff factor 10 -> m2 - vpermq m3, m0, 10101010b ; diff factor 11 -> m3 - vpermq m4, m0, 11111111b ; diff factor 02 -> m4 - movq xm0, [fd_factorsq + OFF_DIFF_COEFF_20] - vpermq m5, m0, 00000000b ; diff factor 20 -> m5 - %define u_downq fd_factorsq ; reuse the fd_factors register after it is no longer needed ; compute the mask for absolute value pcmpeqq m13, m13 @@ -266,11 +253,11 @@ SECTION .text %endmacro INIT_YMM fma3 -cglobal residual_calc_line_s1, 8, 14, 14, linesize, dst, res_max, stride, u, rhs, diff_coeffs, fd_factors,\ - diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_up +cglobal residual_calc_line_s1, 7, 14, 14, linesize, dst, res_max, stride, u, rhs, diff_coeffs,\ + diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_down, u_up RESIDUAL_CALC 1 INIT_YMM fma3 -cglobal residual_calc_line_s2, 8, 15, 16, linesize, dst, res_max, stride, u, rhs, diff_coeffs, fd_factors,\ - diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_up, u_up2 +cglobal residual_calc_line_s2, 7, 15, 16, linesize, dst, res_max, stride, u, rhs, diff_coeffs,\ + diff_coeffs00, diff_coeffs01, diff_coeffs10, diff_coeffs11, diff_coeffs02, u_down, u_up, u_up2 RESIDUAL_CALC 2 |