aboutsummaryrefslogtreecommitdiff
path: root/residual_calc.asm
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-01-09 15:04:40 +0100
committerAnton Khirnov <anton@khirnov.net>2019-01-10 09:14:21 +0100
commita4ce9ca28b1b7b3943b0e7f1b62d1024525e053c (patch)
tree89b6ad31f856e8f467a4e9ddd21cfe785a8a659f /residual_calc.asm
parent2be5215745b27d2a2d7bf5cab1ff3ebe37be5bef (diff)
residual_calc.asm: reduce register use in the s1 variant
Make it similar to the s2 version, which should make it easier to templatize the code in the future.
Diffstat (limited to 'residual_calc.asm')
-rw-r--r--residual_calc.asm18
1 files changed, 8 insertions, 10 deletions
diff --git a/residual_calc.asm b/residual_calc.asm
index f4b11b6..638ff42 100644
--- a/residual_calc.asm
+++ b/residual_calc.asm
@@ -89,31 +89,29 @@ cglobal residual_calc_line_s1, 7, 13, 12, linesize, dst, stride, u, rhs, diff_co
movu m2, [uq + offsetq + 8]
movu m3, [uq + offsetq - 8]
- mulpd m4, m8, [diff_coeffs10q + offsetq]
- mulpd m5, m11, [diff_coeffs20q + offsetq]
-
subpd m6, m2, m3
- vfmadd231pd m0, m4, m6 ; res += d_x u * diff_coeffs10
+ mulpd m6, m8
+ vfmadd231pd m0, m6, [diff_coeffs10q + offsetq] ; res += d_x u * diff_coeffs10
addpd m1, m1
addpd m6, m2, m3
subpd m6, m1
- vfmadd231pd m0, m5, m6 ; res += d_xx u * diff_coeffs20
+ mulpd m6, m11
+ vfmadd231pd m0, m6, [diff_coeffs20q + offsetq] ; res += d_xx u * diff_coeffs20
; dy, d2y
movu m2, [u_upq + offsetq]
movu m3, [u_downq + offsetq]
- mulpd m4, m7, [diff_coeffs01q + offsetq]
- mulpd m5, m10, [diff_coeffs02q + offsetq]
-
subpd m6, m2, m3
- vfmadd231pd m0, m4, m6 ; res += d_y u * diff_coeffs01
+ mulpd m6, m7
+ vfmadd231pd m0, m6, [diff_coeffs01q + offsetq] ; res += d_y u * diff_coeffs01
addpd m6, m2, m3
subpd m6, m1
- vfmadd231pd m0, m5, m6 ; res += d_yy u * diff_coeffs02
+ mulpd m6, m10
+ vfmadd231pd m0, m6, [diff_coeffs02q + offsetq] ; res += d_yy u * diff_coeffs02
; mixed d2xy
movu m1, [u_upq + offsetq + 8]