From a4ce9ca28b1b7b3943b0e7f1b62d1024525e053c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 9 Jan 2019 15:04:40 +0100 Subject: residual_calc.asm: reduce register use in the s1 variant Make it similar to the s2 version, which should make it easier to templatize the code in the future. --- residual_calc.asm | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'residual_calc.asm') diff --git a/residual_calc.asm b/residual_calc.asm index f4b11b6..638ff42 100644 --- a/residual_calc.asm +++ b/residual_calc.asm @@ -89,31 +89,29 @@ cglobal residual_calc_line_s1, 7, 13, 12, linesize, dst, stride, u, rhs, diff_co movu m2, [uq + offsetq + 8] movu m3, [uq + offsetq - 8] - mulpd m4, m8, [diff_coeffs10q + offsetq] - mulpd m5, m11, [diff_coeffs20q + offsetq] - subpd m6, m2, m3 - vfmadd231pd m0, m4, m6 ; res += d_x u * diff_coeffs10 + mulpd m6, m8 + vfmadd231pd m0, m6, [diff_coeffs10q + offsetq] ; res += d_x u * diff_coeffs10 addpd m1, m1 addpd m6, m2, m3 subpd m6, m1 - vfmadd231pd m0, m5, m6 ; res += d_xx u * diff_coeffs20 + mulpd m6, m11 + vfmadd231pd m0, m6, [diff_coeffs20q + offsetq] ; res += d_xx u * diff_coeffs20 ; dy, d2y movu m2, [u_upq + offsetq] movu m3, [u_downq + offsetq] - mulpd m4, m7, [diff_coeffs01q + offsetq] - mulpd m5, m10, [diff_coeffs02q + offsetq] - subpd m6, m2, m3 - vfmadd231pd m0, m4, m6 ; res += d_y u * diff_coeffs01 + mulpd m6, m7 + vfmadd231pd m0, m6, [diff_coeffs01q + offsetq] ; res += d_y u * diff_coeffs01 addpd m6, m2, m3 subpd m6, m1 - vfmadd231pd m0, m5, m6 ; res += d_yy u * diff_coeffs02 + mulpd m6, m10 + vfmadd231pd m0, m6, [diff_coeffs02q + offsetq] ; res += d_yy u * diff_coeffs02 ; mixed d2xy movu m1, [u_upq + offsetq + 8] -- cgit v1.2.3