From fd9251f83202c3e18c3dd7f7ce980d1c05d2d6af Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 6 Sep 2022 12:26:10 +0200 Subject: fill_eq_coeffs_template: replace repeated divisions by reciprocals Much faster. --- src/fill_eq_coeffs_template.c | 108 ++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/src/fill_eq_coeffs_template.c b/src/fill_eq_coeffs_template.c index 6e06c01..ed8c60a 100644 --- a/src/fill_eq_coeffs_template.c +++ b/src/fill_eq_coeffs_template.c @@ -194,6 +194,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv #define DXZ(var) (diff11(&vars[RHS_VAR_ ## var][idx_src], 1, stride_z, dx_inv, dz_inv)) const ELEM_TYPE x = LOAD_VAR(X); + const ELEM_TYPE xinv = ONE / x; // special treatment for the axis is only implemented in the scalar version #ifdef ELEM_SCALAR @@ -234,9 +235,9 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv { dx_gt13, ZERO, dx_gt33 }, }, { - { ZERO, AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO }, - { AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO, AXIS_VAL(dx_gt13, gtxz / x) }, - { ZERO, AXIS_VAL(dx_gt13, gtxz / x), ZERO }, + { ZERO, AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) * xinv), ZERO }, + { AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) * xinv), ZERO, AXIS_VAL(dx_gt13, gtxz * xinv) }, + { ZERO, AXIS_VAL(dx_gt13, gtxz * xinv), ZERO }, }, { { dz_gt11, ZERO, dz_gt13 }, @@ -268,10 +269,10 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv { dxx_gt13, ZERO, dxx_gt33 }, }, { - { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO }, - { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO, - AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) }, - { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO }, + { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO }, + { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO, + AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) }, + { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO }, }, { { dxz_gt11, ZERO, dxz_gt13 }, @@ -282,23 +283,23 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv }, { { - { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO }, - { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO, - AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) }, - { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO }, + { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO }, + { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO, + AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) }, + { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO }, }, { - { AXIS_VAL(dxx_gt22, dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x)), ZERO, - AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) }, - { ZERO, AXIS_VAL(dxx_gt11, dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x)), ZERO }, - { AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO, - AXIS_VAL(dxx_gt33, dx_gt33 / x) }, + { AXIS_VAL(dxx_gt22, dx_gt11 * xinv - TWO * (gtxx - gtyy) * SQR(xinv)), ZERO, + AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) }, + { ZERO, AXIS_VAL(dxx_gt11, dx_gt22 * xinv + TWO * (gtxx - gtyy) * SQR(xinv)), ZERO }, + { AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO, + AXIS_VAL(dxx_gt33, dx_gt33 * xinv) }, }, { - { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO }, - { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO, - AXIS_VAL(dxz_gt13, dz_gt13 / x) }, - { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO }, + { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO }, + { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO, + AXIS_VAL(dxz_gt13, dz_gt13 * xinv) }, + { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv), ZERO }, }, }, @@ -309,10 +310,10 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv { dxz_gt13, ZERO, dxz_gt33 }, }, { - { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO }, - { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO, - AXIS_VAL(dxz_gt13, dz_gt13 / x) }, - { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO }, + { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO }, + { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO, + AXIS_VAL(dxz_gt13, dz_gt13 * xinv) }, + { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv), ZERO }, }, { { dzz_gt11, ZERO, dzz_gt13 }, @@ -352,9 +353,9 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv { dx_At13, ZERO, dx_At33 }, }, { - { ZERO, AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO }, - { AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO, AXIS_VAL(dx_At13, Atxz / x) }, - { ZERO, AXIS_VAL(dx_At13, Atxz / x), ZERO }, + { ZERO, AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) * xinv), ZERO }, + { AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) * xinv), ZERO, AXIS_VAL(dx_At13, Atxz * xinv) }, + { ZERO, AXIS_VAL(dx_At13, Atxz * xinv), ZERO }, }, { { dz_At11, ZERO, dz_At13 }, @@ -364,6 +365,10 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv }; const ELEM_TYPE phi = LOAD_VAR(PHI); + const ELEM_TYPE phi_inv = ONE / phi; + const ELEM_TYPE phi2_inv = SQR(phi_inv); + const ELEM_TYPE phi3_inv = phi2_inv * phi_inv; + const ELEM_TYPE dphi[3] = { DX(PHI), ZERO, DZ(PHI) }; const ELEM_TYPE dxx_phi = DXX(PHI); @@ -372,7 +377,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv const ELEM_TYPE d2phi[3][3] = { { dxx_phi, ZERO, dxz_phi }, - { ZERO, AXIS_VAL(dxx_phi, dphi[0] / x), ZERO }, + { ZERO, AXIS_VAL(dxx_phi, dphi[0] * xinv), ZERO }, { dxz_phi, ZERO, dzz_phi }, }; @@ -408,7 +413,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv const ELEM_TYPE dz_betaz = DZ(BETAZ); const ELEM_TYPE dbeta[3][3] = {{ dx_betax, ZERO, dx_betaz }, - { ZERO, AXIS_VAL(dx_betax, betax / x), ZERO }, + { ZERO, AXIS_VAL(dx_betax, betax * xinv), ZERO }, { dz_betax, ZERO, dz_betaz }}; const ELEM_TYPE dxx_betax = DXX(BETAX); @@ -429,30 +434,31 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv const ELEM_TYPE d2beta[3][3][3] = { { { dxx_betax, ZERO, dxx_betaz }, - { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO }, + { ZERO, AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO }, { dxz_betax, ZERO, dxz_betaz }, }, { - { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO }, - { AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO, AXIS_VAL(dxx_betaz, dx_betaz / x) }, - { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO }, + { ZERO, AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO }, + { AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO, AXIS_VAL(dxx_betaz, dx_betaz * xinv) }, + { ZERO, AXIS_VAL(dxz_betax, dz_betax * xinv), ZERO }, }, { { dxz_betax, ZERO, dxz_betaz }, - { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO }, + { ZERO, AXIS_VAL(dxz_betax, dz_betax * xinv), ZERO }, { dzz_betax, ZERO, dzz_betaz }, }, }; - const ELEM_TYPE det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); + const ELEM_TYPE det = gtxx * gtyy * gtzz + TWO * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); + const ELEM_TYPE det_inv = ONE / det; // \tilde{γ}^{ij} - gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det; - gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det; - gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det; - gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det; - gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det; - gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det; + gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) * det_inv; + gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) * det_inv; + gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) * det_inv; + gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) * det_inv; + gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) * det_inv; + gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) * det_inv; gtu[1][0] = gtu[0][1]; gtu[2][0] = gtu[0][2]; gtu[2][1] = gtu[1][2]; @@ -461,7 +467,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { gu[j][k] = SQR(phi) * gtu[j][k]; - g[j][k] = gt[j][k] / SQR(phi); + g[j][k] = gt[j][k] * SQR(phi_inv); } // β_j @@ -477,8 +483,8 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { - dg[j][k][l] = -TWO * dphi[j] * gt[k][l] / (phi * SQR(phi)) + dgt[j][k][l] / SQR(phi); - dK[j][k][l] = -TWO * dphi[j] * At[k][l] / (phi * SQR(phi)) + dAt[j][k][l] / SQR(phi) + + dg[j][k][l] = -TWO * dphi[j] * gt[k][l] * phi3_inv + dgt[j][k][l] * phi2_inv; + dK[j][k][l] = -TWO * dphi[j] * At[k][l] * phi3_inv + dAt[j][k][l] * phi2_inv + THIRD * (dtrK[j] * g[k][l] + trK * dg[j][k][l]); } @@ -487,11 +493,11 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) for (int m = 0; m < 3; m++) { - d2g[j][k][l][m] = SIZ * gt [l][m] * dphi[j] * dphi[k] / SQR(SQR(phi)) - - TWO * gt [l][m] * d2phi[j][k] / (phi * SQR(phi)) - - TWO * dgt [j][l][m] * dphi[k] / (phi * SQR(phi)) - - TWO * dgt [k][l][m] * dphi[j] / (phi * SQR(phi)) + - d2gt[j][k][l][m] / SQR(phi); + d2g[j][k][l][m] = SIX * gt [l][m] * dphi[j] * dphi[k] * SQR(phi2_inv) - + TWO * gt [l][m] * d2phi[j][k] * phi3_inv - + TWO * dgt [j][l][m] * dphi[k] * phi3_inv - + TWO * dgt [k][l][m] * dphi[j] * phi3_inv + + d2gt[j][k][l][m] * phi2_inv; } // ∂_j γ^{kl} @@ -560,7 +566,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv // K_{ij} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) - K[j][k] = At[j][k] / SQR(phi) + THIRD * g[j][k] * trK; + K[j][k] = At[j][k] * phi2_inv + THIRD * g[j][k] * trK; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { @@ -718,7 +724,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data, idx_dc, gu[0][0] + AXIS_VAL(gu[1][1], ZERO)); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data, idx_dc, gu[2][2]); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data, idx_dc, TWO * gu[0][2]); - STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data, idx_dc, -X[0] + AXIS_VAL(ZERO, gu[1][1] / x)); + STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data, idx_dc, -X[0] + AXIS_VAL(ZERO, gu[1][1] * xinv)); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data, idx_dc, -X[2]); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data, idx_dc, -k2); -- cgit v1.2.3