diff options
author | Anton Khirnov <anton@khirnov.net> | 2019-01-09 15:04:40 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2019-01-10 09:14:21 +0100 |
commit | a4ce9ca28b1b7b3943b0e7f1b62d1024525e053c (patch) | |
tree | 89b6ad31f856e8f467a4e9ddd21cfe785a8a659f /residual_calc.asm | |
parent | 2be5215745b27d2a2d7bf5cab1ff3ebe37be5bef (diff) |
residual_calc.asm: reduce register use in the s1 variant
Make it similar to the s2 version, which should make it easier to
templatize the code in the future.
Diffstat (limited to 'residual_calc.asm')
-rw-r--r-- | residual_calc.asm | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/residual_calc.asm b/residual_calc.asm index f4b11b6..638ff42 100644 --- a/residual_calc.asm +++ b/residual_calc.asm @@ -89,31 +89,29 @@ cglobal residual_calc_line_s1, 7, 13, 12, linesize, dst, stride, u, rhs, diff_co movu m2, [uq + offsetq + 8] movu m3, [uq + offsetq - 8] - mulpd m4, m8, [diff_coeffs10q + offsetq] - mulpd m5, m11, [diff_coeffs20q + offsetq] - subpd m6, m2, m3 - vfmadd231pd m0, m4, m6 ; res += d_x u * diff_coeffs10 + mulpd m6, m8 + vfmadd231pd m0, m6, [diff_coeffs10q + offsetq] ; res += d_x u * diff_coeffs10 addpd m1, m1 addpd m6, m2, m3 subpd m6, m1 - vfmadd231pd m0, m5, m6 ; res += d_xx u * diff_coeffs20 + mulpd m6, m11 + vfmadd231pd m0, m6, [diff_coeffs20q + offsetq] ; res += d_xx u * diff_coeffs20 ; dy, d2y movu m2, [u_upq + offsetq] movu m3, [u_downq + offsetq] - mulpd m4, m7, [diff_coeffs01q + offsetq] - mulpd m5, m10, [diff_coeffs02q + offsetq] - subpd m6, m2, m3 - vfmadd231pd m0, m4, m6 ; res += d_y u * diff_coeffs01 + mulpd m6, m7 + vfmadd231pd m0, m6, [diff_coeffs01q + offsetq] ; res += d_y u * diff_coeffs01 addpd m6, m2, m3 subpd m6, m1 - vfmadd231pd m0, m5, m6 ; res += d_yy u * diff_coeffs02 + mulpd m6, m10 + vfmadd231pd m0, m6, [diff_coeffs02q + offsetq] ; res += d_yy u * diff_coeffs02 ; mixed d2xy movu m1, [u_upq + offsetq + 8] |