summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2022-09-06 12:26:10 +0200
committerAnton Khirnov <anton@khirnov.net>2022-09-06 12:26:37 +0200
commitfd9251f83202c3e18c3dd7f7ce980d1c05d2d6af (patch)
tree40e2a071e97ba4976c08847d548d85c47ca0e754
parentc1532eebeab91159eb0ee18552da6a31ab3248ef (diff)
fill_eq_coeffs_template: replace repeated divisions by reciprocals
Much faster.
-rw-r--r--src/fill_eq_coeffs_template.c108
1 files changed, 57 insertions, 51 deletions
diff --git a/src/fill_eq_coeffs_template.c b/src/fill_eq_coeffs_template.c
index 6e06c01..ed8c60a 100644
--- a/src/fill_eq_coeffs_template.c
+++ b/src/fill_eq_coeffs_template.c
@@ -194,6 +194,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
#define DXZ(var) (diff11(&vars[RHS_VAR_ ## var][idx_src], 1, stride_z, dx_inv, dz_inv))
const ELEM_TYPE x = LOAD_VAR(X);
+ const ELEM_TYPE xinv = ONE / x;
// special treatment for the axis is only implemented in the scalar version
#ifdef ELEM_SCALAR
@@ -234,9 +235,9 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
{ dx_gt13, ZERO, dx_gt33 },
},
{
- { ZERO, AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO },
- { AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO, AXIS_VAL(dx_gt13, gtxz / x) },
- { ZERO, AXIS_VAL(dx_gt13, gtxz / x), ZERO },
+ { ZERO, AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) * xinv), ZERO },
+ { AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) * xinv), ZERO, AXIS_VAL(dx_gt13, gtxz * xinv) },
+ { ZERO, AXIS_VAL(dx_gt13, gtxz * xinv), ZERO },
},
{
{ dz_gt11, ZERO, dz_gt13 },
@@ -268,10 +269,10 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
{ dxx_gt13, ZERO, dxx_gt33 },
},
{
- { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO },
- { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO,
- AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) },
- { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO },
+ { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO },
+ { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO,
+ AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) },
+ { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO },
},
{
{ dxz_gt11, ZERO, dxz_gt13 },
@@ -282,23 +283,23 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
},
{
{
- { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO },
- { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO,
- AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) },
- { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO },
+ { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO },
+ { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO,
+ AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) },
+ { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO },
},
{
- { AXIS_VAL(dxx_gt22, dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x)), ZERO,
- AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) },
- { ZERO, AXIS_VAL(dxx_gt11, dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x)), ZERO },
- { AXIS_VAL(HALF * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO,
- AXIS_VAL(dxx_gt33, dx_gt33 / x) },
+ { AXIS_VAL(dxx_gt22, dx_gt11 * xinv - TWO * (gtxx - gtyy) * SQR(xinv)), ZERO,
+ AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) },
+ { ZERO, AXIS_VAL(dxx_gt11, dx_gt22 * xinv + TWO * (gtxx - gtyy) * SQR(xinv)), ZERO },
+ { AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO,
+ AXIS_VAL(dxx_gt33, dx_gt33 * xinv) },
},
{
- { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO },
- { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO,
- AXIS_VAL(dxz_gt13, dz_gt13 / x) },
- { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO },
+ { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO },
+ { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO,
+ AXIS_VAL(dxz_gt13, dz_gt13 * xinv) },
+ { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv), ZERO },
},
},
@@ -309,10 +310,10 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
{ dxz_gt13, ZERO, dxz_gt33 },
},
{
- { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO },
- { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO,
- AXIS_VAL(dxz_gt13, dz_gt13 / x) },
- { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO },
+ { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO },
+ { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO,
+ AXIS_VAL(dxz_gt13, dz_gt13 * xinv) },
+ { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv), ZERO },
},
{
{ dzz_gt11, ZERO, dzz_gt13 },
@@ -352,9 +353,9 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
{ dx_At13, ZERO, dx_At33 },
},
{
- { ZERO, AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO },
- { AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO, AXIS_VAL(dx_At13, Atxz / x) },
- { ZERO, AXIS_VAL(dx_At13, Atxz / x), ZERO },
+ { ZERO, AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) * xinv), ZERO },
+ { AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) * xinv), ZERO, AXIS_VAL(dx_At13, Atxz * xinv) },
+ { ZERO, AXIS_VAL(dx_At13, Atxz * xinv), ZERO },
},
{
{ dz_At11, ZERO, dz_At13 },
@@ -364,6 +365,10 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
};
const ELEM_TYPE phi = LOAD_VAR(PHI);
+ const ELEM_TYPE phi_inv = ONE / phi;
+ const ELEM_TYPE phi2_inv = SQR(phi_inv);
+ const ELEM_TYPE phi3_inv = phi2_inv * phi_inv;
+
const ELEM_TYPE dphi[3] = { DX(PHI), ZERO, DZ(PHI) };
const ELEM_TYPE dxx_phi = DXX(PHI);
@@ -372,7 +377,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
const ELEM_TYPE d2phi[3][3] = {
{ dxx_phi, ZERO, dxz_phi },
- { ZERO, AXIS_VAL(dxx_phi, dphi[0] / x), ZERO },
+ { ZERO, AXIS_VAL(dxx_phi, dphi[0] * xinv), ZERO },
{ dxz_phi, ZERO, dzz_phi },
};
@@ -408,7 +413,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
const ELEM_TYPE dz_betaz = DZ(BETAZ);
const ELEM_TYPE dbeta[3][3] = {{ dx_betax, ZERO, dx_betaz },
- { ZERO, AXIS_VAL(dx_betax, betax / x), ZERO },
+ { ZERO, AXIS_VAL(dx_betax, betax * xinv), ZERO },
{ dz_betax, ZERO, dz_betaz }};
const ELEM_TYPE dxx_betax = DXX(BETAX);
@@ -429,30 +434,31 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
const ELEM_TYPE d2beta[3][3][3] = {
{
{ dxx_betax, ZERO, dxx_betaz },
- { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO },
+ { ZERO, AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO },
{ dxz_betax, ZERO, dxz_betaz },
},
{
- { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO },
- { AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO, AXIS_VAL(dxx_betaz, dx_betaz / x) },
- { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO },
+ { ZERO, AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO },
+ { AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO, AXIS_VAL(dxx_betaz, dx_betaz * xinv) },
+ { ZERO, AXIS_VAL(dxz_betax, dz_betax * xinv), ZERO },
},
{
{ dxz_betax, ZERO, dxz_betaz },
- { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO },
+ { ZERO, AXIS_VAL(dxz_betax, dz_betax * xinv), ZERO },
{ dzz_betax, ZERO, dzz_betaz },
},
};
- const ELEM_TYPE det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz);
+ const ELEM_TYPE det = gtxx * gtyy * gtzz + TWO * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz);
+ const ELEM_TYPE det_inv = ONE / det;
// \tilde{γ}^{ij}
- gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det;
- gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det;
- gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det;
- gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det;
- gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det;
- gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det;
+ gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) * det_inv;
+ gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) * det_inv;
+ gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) * det_inv;
+ gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) * det_inv;
+ gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) * det_inv;
+ gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) * det_inv;
gtu[1][0] = gtu[0][1];
gtu[2][0] = gtu[0][2];
gtu[2][1] = gtu[1][2];
@@ -461,7 +467,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
gu[j][k] = SQR(phi) * gtu[j][k];
- g[j][k] = gt[j][k] / SQR(phi);
+ g[j][k] = gt[j][k] * SQR(phi_inv);
}
// β_j
@@ -477,8 +483,8 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
- dg[j][k][l] = -TWO * dphi[j] * gt[k][l] / (phi * SQR(phi)) + dgt[j][k][l] / SQR(phi);
- dK[j][k][l] = -TWO * dphi[j] * At[k][l] / (phi * SQR(phi)) + dAt[j][k][l] / SQR(phi) +
+ dg[j][k][l] = -TWO * dphi[j] * gt[k][l] * phi3_inv + dgt[j][k][l] * phi2_inv;
+ dK[j][k][l] = -TWO * dphi[j] * At[k][l] * phi3_inv + dAt[j][k][l] * phi2_inv +
THIRD * (dtrK[j] * g[k][l] + trK * dg[j][k][l]);
}
@@ -487,11 +493,11 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++)
for (int m = 0; m < 3; m++) {
- d2g[j][k][l][m] = SIZ * gt [l][m] * dphi[j] * dphi[k] / SQR(SQR(phi)) -
- TWO * gt [l][m] * d2phi[j][k] / (phi * SQR(phi)) -
- TWO * dgt [j][l][m] * dphi[k] / (phi * SQR(phi)) -
- TWO * dgt [k][l][m] * dphi[j] / (phi * SQR(phi)) +
- d2gt[j][k][l][m] / SQR(phi);
+ d2g[j][k][l][m] = SIX * gt [l][m] * dphi[j] * dphi[k] * SQR(phi2_inv) -
+ TWO * gt [l][m] * d2phi[j][k] * phi3_inv -
+ TWO * dgt [j][l][m] * dphi[k] * phi3_inv -
+ TWO * dgt [k][l][m] * dphi[j] * phi3_inv +
+ d2gt[j][k][l][m] * phi2_inv;
}
// ∂_j γ^{kl}
@@ -560,7 +566,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
// K_{ij}
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
- K[j][k] = At[j][k] / SQR(phi) + THIRD * g[j][k] * trK;
+ K[j][k] = At[j][k] * phi2_inv + THIRD * g[j][k] * trK;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
@@ -718,7 +724,7 @@ FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solv
STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data, idx_dc, gu[0][0] + AXIS_VAL(gu[1][1], ZERO));
STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data, idx_dc, gu[2][2]);
STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data, idx_dc, TWO * gu[0][2]);
- STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data, idx_dc, -X[0] + AXIS_VAL(ZERO, gu[1][1] / x));
+ STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data, idx_dc, -X[0] + AXIS_VAL(ZERO, gu[1][1] * xinv));
STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data, idx_dc, -X[2]);
STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data, idx_dc, -k2);