From ff2c324b778836e2b1ec44ec9cfeca3b73f5b942 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 27 Aug 2022 17:15:18 +0200 Subject: fill_eq_coeffs_template: prepare for vectorization Replace doubles with a generic ELEM_TYPE where appropriate. Use LOAD() and STORE() macros to write into arrays. Use named constants instead of floating-point literals. --- src/fill_eq_coeffs_template.c | 525 +++++++++++++++++++++++------------------- 1 file changed, 285 insertions(+), 240 deletions(-) diff --git a/src/fill_eq_coeffs_template.c b/src/fill_eq_coeffs_template.c index 20706f6..c5fe884 100644 --- a/src/fill_eq_coeffs_template.c +++ b/src/fill_eq_coeffs_template.c @@ -1,56 +1,75 @@ -#define LOAD(arr, idx) (arr[idx]) +#define ELEM_TYPE double +#define ELEM_SIZE sizeof(ELEM_TYPE) / sizeof(double) + +#define ELEM_SPLAT(x) (x) -static inline double fd1_4(const double *arr, const ptrdiff_t stride, const double h_inv) +#define LOAD(arr, idx) (arr[idx]) +#define STORE(arr, idx, val) arr[idx] = val + +// while gcc supports multiplying vectors by scalars, icc does not, +// which forces us to do this +#define ZERO ELEM_SPLAT(0.0) +#define ONE ELEM_SPLAT(1.0) +#define TWO ELEM_SPLAT(2.0) +#define SIX ELEM_SPLAT(6.0) +#define EIGHT ELEM_SPLAT(8.0) +#define C16 ELEM_SPLAT(16.0) +#define C30 ELEM_SPLAT(30.0) + +#define HALF ELEM_SPLAT(0.5) +#define THIRD ELEM_SPLAT(1.0 / 3.0) +#define C1_12 ELEM_SPLAT(1. / 12.0) +#define C1_144 ELEM_SPLAT(1.0 / 144.0) + +static inline ELEM_TYPE fd1_4(const double *arr, const ptrdiff_t stride, const ELEM_TYPE h_inv) { - const double ap2 = LOAD(arr, 2 * stride); - const double ap1 = LOAD(arr, 1 * stride); - const double am1 = LOAD(arr, -1 * stride); - const double am2 = LOAD(arr, -2 * stride); - return (-ap2 + 8.0 * ap1 - 8.0 * am1 + am2) * h_inv / 12.0; + const ELEM_TYPE ap2 = LOAD(arr, 2 * stride); + const ELEM_TYPE ap1 = LOAD(arr, 1 * stride); + const ELEM_TYPE am1 = LOAD(arr, -1 * stride); + const ELEM_TYPE am2 = LOAD(arr, -2 * stride); + return (-ap2 + EIGHT * ap1 - EIGHT * am1 + am2) * h_inv * C1_12; } -static inline double fd2_4(const double *arr, const ptrdiff_t stride, const double h_inv) +static inline ELEM_TYPE fd2_4(const double *arr, const ptrdiff_t stride, const ELEM_TYPE h_inv) { - const double ap2 = LOAD(arr, 2 * stride); - const double ap1 = LOAD(arr, 1 * stride); - const double a0 = LOAD(arr, 0 * stride); - const double am1 = LOAD(arr, -1 * stride); - const double am2 = LOAD(arr, -2 * stride); - return (-ap2 + 16.0 * ap1 - 30.0 * a0 + 16.0 * am1 - am2) * SQR(h_inv) / 12.0; + const ELEM_TYPE ap2 = LOAD(arr, 2 * stride); + const ELEM_TYPE ap1 = LOAD(arr, 1 * stride); + const ELEM_TYPE a0 = LOAD(arr, 0 * stride); + const ELEM_TYPE am1 = LOAD(arr, -1 * stride); + const ELEM_TYPE am2 = LOAD(arr, -2 * stride); + return (-ap2 + C16 * ap1 - C30 * a0 + C16 * am1 - am2) * SQR(h_inv) * C1_12; } -static inline double fd11_4(const double *arr, ptrdiff_t stride_x, ptrdiff_t stride_z, - double dx_inv, double dz_inv) +static inline ELEM_TYPE fd11_4(const double *arr, ptrdiff_t stride_x, ptrdiff_t stride_z, + ELEM_TYPE dx_inv, ELEM_TYPE dz_inv) { - const double ap2p2 = LOAD(arr, 2 * stride_x + 2 * stride_z); - const double ap2p1 = LOAD(arr, 2 * stride_x + 1 * stride_z); - const double ap2m1 = LOAD(arr, 2 * stride_x - 1 * stride_z); - const double ap2m2 = LOAD(arr, 2 * stride_x - 2 * stride_z); - - const double ap1p2 = LOAD(arr, 1 * stride_x + 2 * stride_z); - const double ap1p1 = LOAD(arr, 1 * stride_x + 1 * stride_z); - const double ap1m1 = LOAD(arr, 1 * stride_x - 1 * stride_z); - const double ap1m2 = LOAD(arr, 1 * stride_x - 2 * stride_z); - - const double am1p2 = LOAD(arr, -1 * stride_x + 2 * stride_z); - const double am1p1 = LOAD(arr, -1 * stride_x + 1 * stride_z); - const double am1m1 = LOAD(arr, -1 * stride_x - 1 * stride_z); - const double am1m2 = LOAD(arr, -1 * stride_x - 2 * stride_z); - - const double am2p2 = LOAD(arr, -2 * stride_x + 2 * stride_z); - const double am2p1 = LOAD(arr, -2 * stride_x + 1 * stride_z); - const double am2m1 = LOAD(arr, -2 * stride_x - 1 * stride_z); - const double am2m2 = LOAD(arr, -2 * stride_x - 2 * stride_z); - - return (-1.0 * (-1.0 * ap2p2 + 8.0 * ap1p2 - 8.0 * am1p2 + 1.0 * am2p2) - +8.0 * (-1.0 * ap2p1 + 8.0 * ap1p1 - 8.0 * am1p1 + 1.0 * am2p1) - -8.0 * (-1.0 * ap2m1 + 8.0 * ap1m1 - 8.0 * am1m1 + 1.0 * am2m1) - +1.0 * (-1.0 * ap2m2 + 8.0 * ap1m2 - 8.0 * am1m2 + 1.0 * am2m2)) * dx_inv * dz_inv / 144.0; + const ELEM_TYPE ap2p2 = LOAD(arr, 2 * stride_x + 2 * stride_z); + const ELEM_TYPE ap2p1 = LOAD(arr, 2 * stride_x + 1 * stride_z); + const ELEM_TYPE ap2m1 = LOAD(arr, 2 * stride_x - 1 * stride_z); + const ELEM_TYPE ap2m2 = LOAD(arr, 2 * stride_x - 2 * stride_z); + + const ELEM_TYPE ap1p2 = LOAD(arr, 1 * stride_x + 2 * stride_z); + const ELEM_TYPE ap1p1 = LOAD(arr, 1 * stride_x + 1 * stride_z); + const ELEM_TYPE ap1m1 = LOAD(arr, 1 * stride_x - 1 * stride_z); + const ELEM_TYPE ap1m2 = LOAD(arr, 1 * stride_x - 2 * stride_z); + + const ELEM_TYPE am1p2 = LOAD(arr, -1 * stride_x + 2 * stride_z); + const ELEM_TYPE am1p1 = LOAD(arr, -1 * stride_x + 1 * stride_z); + const ELEM_TYPE am1m1 = LOAD(arr, -1 * stride_x - 1 * stride_z); + const ELEM_TYPE am1m2 = LOAD(arr, -1 * stride_x - 2 * stride_z); + + const ELEM_TYPE am2p2 = LOAD(arr, -2 * stride_x + 2 * stride_z); + const ELEM_TYPE am2p1 = LOAD(arr, -2 * stride_x + 1 * stride_z); + const ELEM_TYPE am2m1 = LOAD(arr, -2 * stride_x - 1 * stride_z); + const ELEM_TYPE am2m2 = LOAD(arr, -2 * stride_x - 2 * stride_z); + + return (-ONE * (-ap2p2 + EIGHT * ap1p2 - EIGHT * am1p2 + am2p2) + +EIGHT * (-ap2p1 + EIGHT * ap1p1 - EIGHT * am1p1 + am2p1) + -EIGHT * (-ap2m1 + EIGHT * ap1m1 - EIGHT * am1m1 + am2m1) + +ONE * (-ap2m2 + EIGHT * ap1m2 - EIGHT * am1m2 + am2m2)) * dx_inv * dz_inv * C1_144; } -#undef LOAD - #if 0 #define FD8(arr, stride, h_inv) \ ((-3.0 * arr[4 * stride] + 32.0 * arr[3 * stride] - 168.0 * arr[2 * stride] + 672.0 * arr[1 * stride] \ @@ -108,8 +127,11 @@ static double diff8_dxz(const double *arr, ptrdiff_t stride, double dx_inv, doub static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext *solver, const double * const vars[], const int idx_z, - const ptrdiff_t stride_z, const double dx_inv, const double dz_inv) + const ptrdiff_t stride_z, const double dx_inv_scal, const double dz_inv_scal) { + const ELEM_TYPE dx_inv = ELEM_SPLAT(dx_inv_scal); + const ELEM_TYPE dz_inv = ELEM_SPLAT(dz_inv_scal); + for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); const int idx_dc = idx_z * solver->diff_coeffs[0]->stride + idx_x; @@ -118,237 +140,240 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext const double x = vars[RHS_VAR_X][idx_src]; const int on_axis = fabs(x) < 1e-13; - double K[3][3], Km[3][3], Ku[3][3], dK[3][3][3]; - double gtu[3][3], g[3][3], gu[3][3]; - double dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3]; - double G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3]; - double Ric[3][3], Ricm[3][3]; - double dgdot[3][3][3]; - double D2alpha[3][3]; - double Kdot[3][3]; - double k2, Kij_Dij_alpha, k_kdot, k3; - double Gammadot_term, beta_term; +#define AXIS_VAL(axis, normal) (on_axis ? (axis) : (normal)) + + ELEM_TYPE K[3][3], Km[3][3], Ku[3][3], dK[3][3][3]; + ELEM_TYPE gtu[3][3], g[3][3], gu[3][3]; + ELEM_TYPE dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3]; + ELEM_TYPE G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3]; + ELEM_TYPE Ric[3][3], Ricm[3][3]; + ELEM_TYPE dgdot[3][3][3]; + ELEM_TYPE D2alpha[3][3]; + ELEM_TYPE Kdot[3][3]; + ELEM_TYPE k2, Kij_Dij_alpha, k_kdot, k3; + ELEM_TYPE Gammadot_term, beta_term; - double betal[3], dbetal[3][3], d2betal[3][3][3]; + ELEM_TYPE betal[3], dbetal[3][3], d2betal[3][3][3]; -#define LOAD_VAR(var) ( vars[RHS_VAR_ ## var][idx_src]) +#define LOAD_VAR(var) LOAD ( vars[RHS_VAR_ ## var], idx_src) #define DX(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv )) #define DZ(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) #define DXX(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv)) #define DZZ(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) #define DXZ(var) (diff11(&vars[RHS_VAR_ ## var][idx_src], 1, stride_z, dx_inv, dz_inv)) - const double gtxx = LOAD_VAR(GTXX); - const double gtyy = LOAD_VAR(GTYY); - const double gtzz = LOAD_VAR(GTZZ); - const double gtxy = LOAD_VAR(GTXY); - const double gtxz = LOAD_VAR(GTXZ); - const double gtyz = LOAD_VAR(GTYZ); + const ELEM_TYPE gtxx = LOAD_VAR(GTXX); + const ELEM_TYPE gtyy = LOAD_VAR(GTYY); + const ELEM_TYPE gtzz = LOAD_VAR(GTZZ); + const ELEM_TYPE gtxy = LOAD_VAR(GTXY); + const ELEM_TYPE gtxz = LOAD_VAR(GTXZ); + const ELEM_TYPE gtyz = LOAD_VAR(GTYZ); - const double gt[3][3] = {{ gtxx, gtxy, gtxz }, - { gtxy, gtyy, gtyz }, - { gtxz, gtyz, gtzz }}; + const ELEM_TYPE gt[3][3] = {{ gtxx, gtxy, gtxz }, + { gtxy, gtyy, gtyz }, + { gtxz, gtyz, gtzz }}; - const double dx_gt11 = DX(GTXX); - const double dx_gt22 = DX(GTYY); - const double dx_gt33 = DX(GTZZ); - const double dx_gt13 = DX(GTXZ); + const ELEM_TYPE dx_gt11 = DX(GTXX); + const ELEM_TYPE dx_gt22 = DX(GTYY); + const ELEM_TYPE dx_gt33 = DX(GTZZ); + const ELEM_TYPE dx_gt13 = DX(GTXZ); - const double dz_gt11 = DZ(GTXX); - const double dz_gt22 = DZ(GTYY); - const double dz_gt33 = DZ(GTZZ); - const double dz_gt13 = DZ(GTXZ); + const ELEM_TYPE dz_gt11 = DZ(GTXX); + const ELEM_TYPE dz_gt22 = DZ(GTYY); + const ELEM_TYPE dz_gt33 = DZ(GTZZ); + const ELEM_TYPE dz_gt13 = DZ(GTXZ); - const double dgt[3][3][3] = { + const ELEM_TYPE dgt[3][3][3] = { { - { dx_gt11, 0.0, dx_gt13 }, - { 0.0, dx_gt22, 0.0 }, - { dx_gt13, 0.0, dx_gt33 }, + { dx_gt11, ZERO, dx_gt13 }, + { ZERO, dx_gt22, ZERO }, + { dx_gt13, ZERO, dx_gt33 }, }, { - { 0.0, on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0 }, - { on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0, on_axis ? dx_gt13 : gtxz / x }, - { 0.0, on_axis ? dx_gt13 : gtxz / x, 0.0 }, + { ZERO, AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO }, + { AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO, AXIS_VAL(dx_gt13, gtxz / x) }, + { ZERO, AXIS_VAL(dx_gt13, gtxz / x), ZERO }, }, { - { dz_gt11, 0.0, dz_gt13 }, - { 0.0, dz_gt22, 0.0 }, - { dz_gt13, 0.0, dz_gt33 }, + { dz_gt11, ZERO, dz_gt13 }, + { ZERO, dz_gt22, ZERO }, + { dz_gt13, ZERO, dz_gt33 }, }, }; - const double dxx_gt11 = DXX(GTXX); - const double dxx_gt22 = DXX(GTYY); - const double dxx_gt33 = DXX(GTZZ); - const double dxx_gt13 = DXX(GTXZ); + const ELEM_TYPE dxx_gt11 = DXX(GTXX); + const ELEM_TYPE dxx_gt22 = DXX(GTYY); + const ELEM_TYPE dxx_gt33 = DXX(GTZZ); + const ELEM_TYPE dxx_gt13 = DXX(GTXZ); - const double dxz_gt11 = DXZ(GTXX); - const double dxz_gt22 = DXZ(GTYY); - const double dxz_gt33 = DXZ(GTZZ); - const double dxz_gt13 = DXZ(GTXZ); + const ELEM_TYPE dxz_gt11 = DXZ(GTXX); + const ELEM_TYPE dxz_gt22 = DXZ(GTYY); + const ELEM_TYPE dxz_gt33 = DXZ(GTZZ); + const ELEM_TYPE dxz_gt13 = DXZ(GTXZ); - const double dzz_gt11 = DZZ(GTXX); - const double dzz_gt22 = DZZ(GTYY); - const double dzz_gt33 = DZZ(GTZZ); - const double dzz_gt13 = DZZ(GTXZ); + const ELEM_TYPE dzz_gt11 = DZZ(GTXX); + const ELEM_TYPE dzz_gt22 = DZZ(GTYY); + const ELEM_TYPE dzz_gt33 = DZZ(GTZZ); + const ELEM_TYPE dzz_gt13 = DZZ(GTXZ); - const double d2gt[3][3][3][3] = { + const ELEM_TYPE d2gt[3][3][3][3] = { { { - { dxx_gt11, 0.0, dxx_gt13 }, - { 0.0, dxx_gt22, 0.0 }, - { dxx_gt13, 0.0, dxx_gt33 }, + { dxx_gt11, ZERO, dxx_gt13 }, + { ZERO, dxx_gt22, ZERO }, + { dxx_gt13, ZERO, dxx_gt33 }, }, { - { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, - { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, - on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, - { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, + { ZERO, AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO }, + { AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO, + AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) }, + { ZERO, AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO }, }, { - { dxz_gt11, 0.0, dxz_gt13 }, - { 0.0, dxz_gt22, 0.0 }, - { dxz_gt13, 0.0, dxz_gt33 }, + { dxz_gt11, ZERO, dxz_gt13 }, + { ZERO, dxz_gt22, ZERO }, + { dxz_gt13, ZERO, dxz_gt33 }, }, }, { { - { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, - { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, - on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, - { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, + { ZERO, AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO }, + { AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO, + AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) }, + { ZERO, AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO }, }, { - { on_axis ? dxx_gt22 : dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x), 0.0, - on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, - { 0.0, on_axis ? dxx_gt11 : dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x), 0.0 }, - { on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0, on_axis ? dxx_gt33 : dx_gt33 / x }, + { AXIS_VAL(dxx_gt22, dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x)), ZERO, + AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) }, + { ZERO, AXIS_VAL(dxx_gt11, dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x)), ZERO }, + { AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO, + AXIS_VAL(dxx_gt33, dx_gt33 / x) }, }, { - { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, - { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, - on_axis ? dxz_gt13 : dz_gt13 / x }, - { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, + { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO }, + { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO, + AXIS_VAL(dxz_gt13, dz_gt13 / x) }, + { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO }, }, }, { { - { dxz_gt11, 0.0, dxz_gt13 }, - { 0.0, dxz_gt22, 0.0 }, - { dxz_gt13, 0.0, dxz_gt33 }, + { dxz_gt11, ZERO, dxz_gt13 }, + { ZERO, dxz_gt22, ZERO }, + { dxz_gt13, ZERO, dxz_gt33 }, }, { - { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, - { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, - on_axis ? dxz_gt13 : dz_gt13 / x }, - { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, + { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO }, + { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO, + AXIS_VAL(dxz_gt13, dz_gt13 / x) }, + { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO }, }, { - { dzz_gt11, 0.0, dzz_gt13 }, - { 0.0, dzz_gt22, 0.0 }, - { dzz_gt13, 0.0, dzz_gt33 }, + { dzz_gt11, ZERO, dzz_gt13 }, + { ZERO, dzz_gt22, ZERO }, + { dzz_gt13, ZERO, dzz_gt33 }, }, }, }; - const double Atxx = LOAD_VAR(ATXX); - const double Atyy = LOAD_VAR(ATYY); - const double Atzz = LOAD_VAR(ATZZ); - const double Atxy = LOAD_VAR(ATXY); - const double Atxz = LOAD_VAR(ATXZ); - const double Atyz = LOAD_VAR(ATYZ); + const ELEM_TYPE Atxx = LOAD_VAR(ATXX); + const ELEM_TYPE Atyy = LOAD_VAR(ATYY); + const ELEM_TYPE Atzz = LOAD_VAR(ATZZ); + const ELEM_TYPE Atxy = LOAD_VAR(ATXY); + const ELEM_TYPE Atxz = LOAD_VAR(ATXZ); + const ELEM_TYPE Atyz = LOAD_VAR(ATYZ); - const double At[3][3] = { + const ELEM_TYPE At[3][3] = { { Atxx, Atxy, Atxz }, { Atxy, Atyy, Atyz }, { Atxz, Atyz, Atzz }}; - const double dx_At11 = DX(ATXX); - const double dx_At22 = DX(ATYY); - const double dx_At33 = DX(ATZZ); - const double dx_At13 = DX(ATXZ); + const ELEM_TYPE dx_At11 = DX(ATXX); + const ELEM_TYPE dx_At22 = DX(ATYY); + const ELEM_TYPE dx_At33 = DX(ATZZ); + const ELEM_TYPE dx_At13 = DX(ATXZ); - const double dz_At11 = DZ(ATXX); - const double dz_At22 = DZ(ATYY); - const double dz_At33 = DZ(ATZZ); - const double dz_At13 = DZ(ATXZ); + const ELEM_TYPE dz_At11 = DZ(ATXX); + const ELEM_TYPE dz_At22 = DZ(ATYY); + const ELEM_TYPE dz_At33 = DZ(ATZZ); + const ELEM_TYPE dz_At13 = DZ(ATXZ); - const double dAt[3][3][3] = { + const ELEM_TYPE dAt[3][3][3] = { { - { dx_At11, 0.0, dx_At13 }, - { 0.0, dx_At22, 0.0 }, - { dx_At13, 0.0, dx_At33 }, + { dx_At11, ZERO, dx_At13 }, + { ZERO, dx_At22, ZERO }, + { dx_At13, ZERO, dx_At33 }, }, { - { 0.0, on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0 }, - { on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0, on_axis ? dx_At13 : Atxz / x }, - { 0.0, on_axis ? dx_At13 : Atxz / x, 0.0 }, + { ZERO, AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO }, + { AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO, AXIS_VAL(dx_At13, Atxz / x) }, + { ZERO, AXIS_VAL(dx_At13, Atxz / x), ZERO }, }, { - { dz_At11, 0.0, dz_At13 }, - { 0.0, dz_At22, 0.0 }, - { dz_At13, 0.0, dz_At33 }, + { dz_At11, ZERO, dz_At13 }, + { ZERO, dz_At22, ZERO }, + { dz_At13, ZERO, dz_At33 }, }, }; - const double phi = LOAD_VAR(PHI); - const double dphi[3] = { DX(PHI), 0.0, DZ(PHI) }; + const ELEM_TYPE phi = LOAD_VAR(PHI); + const ELEM_TYPE dphi[3] = { DX(PHI), ZERO, DZ(PHI) }; - const double dxx_phi = DXX(PHI); - const double dzz_phi = DZZ(PHI); - const double dxz_phi = DXZ(PHI); + const ELEM_TYPE dxx_phi = DXX(PHI); + const ELEM_TYPE dzz_phi = DZZ(PHI); + const ELEM_TYPE dxz_phi = DXZ(PHI); - const double d2phi[3][3] = { - { dxx_phi, 0.0, dxz_phi }, - { 0.0, on_axis ? dxx_phi : dphi[0] / x, 0.0 }, - { dxz_phi, 0.0, dzz_phi }, + const ELEM_TYPE d2phi[3][3] = { + { dxx_phi, ZERO, dxz_phi }, + { ZERO, AXIS_VAL(dxx_phi, dphi[0] / x), ZERO }, + { dxz_phi, ZERO, dzz_phi }, }; - const double trK = LOAD_VAR(TRK); - const double dtrK[3] = { DX(TRK), 0.0, DZ(TRK) }; + const ELEM_TYPE trK = LOAD_VAR(TRK); + const ELEM_TYPE dtrK[3] = { DX(TRK), ZERO, DZ(TRK) }; - const double Xtx = LOAD_VAR(XTX); - const double Xtz = LOAD_VAR(XTZ); + const ELEM_TYPE Xtx = LOAD_VAR(XTX); + const ELEM_TYPE Xtz = LOAD_VAR(XTZ); - const double alpha = LOAD_VAR(ALPHA); - const double dx_alpha = DX(ALPHA); - const double dz_alpha = DZ(ALPHA); + const ELEM_TYPE alpha = LOAD_VAR(ALPHA); + const ELEM_TYPE dx_alpha = DX(ALPHA); + const ELEM_TYPE dz_alpha = DZ(ALPHA); - const double dalpha[3] = { dx_alpha, 0.0, dz_alpha }; + const ELEM_TYPE dalpha[3] = { dx_alpha, ZERO, dz_alpha }; - const double dxx_alpha = DXX(ALPHA); - const double dzz_alpha = DZZ(ALPHA); - const double dxz_alpha = DXZ(ALPHA); + const ELEM_TYPE dxx_alpha = DXX(ALPHA); + const ELEM_TYPE dzz_alpha = DZZ(ALPHA); + const ELEM_TYPE dxz_alpha = DXZ(ALPHA); - const double d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha }, - { 0, on_axis ? dxx_alpha : dx_alpha / x, 0 }, - { dxz_alpha, 0, dzz_alpha }}; + const ELEM_TYPE d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha }, + { 0, AXIS_VAL(dxx_alpha, dx_alpha / x), 0 }, + { dxz_alpha, 0, dzz_alpha }}; - const double betax = LOAD_VAR(BETAX); - const double betaz = LOAD_VAR(BETAX); + const ELEM_TYPE betax = LOAD_VAR(BETAX); + const ELEM_TYPE betaz = LOAD_VAR(BETAX); - const double beta[3] = { betax, 0.0, betaz }; + const ELEM_TYPE beta[3] = { betax, ZERO, betaz }; - const double dx_betax = DX(BETAX); - const double dz_betax = DZ(BETAX); + const ELEM_TYPE dx_betax = DX(BETAX); + const ELEM_TYPE dz_betax = DZ(BETAX); - const double dx_betaz = DX(BETAZ); - const double dz_betaz = DZ(BETAZ); + const ELEM_TYPE dx_betaz = DX(BETAZ); + const ELEM_TYPE dz_betaz = DZ(BETAZ); - const double dbeta[3][3] = {{ dx_betax, 0.0, dx_betaz }, - { 0.0, on_axis ? dx_betax : betax / x, 0.0 }, - { dz_betax, 0.0, dz_betaz }}; + const ELEM_TYPE dbeta[3][3] = {{ dx_betax, ZERO, dx_betaz }, + { ZERO, AXIS_VAL(dx_betax, betax / x), ZERO }, + { dz_betax, ZERO, dz_betaz }}; - const double dxx_betax = DXX(BETAX); - const double dxz_betax = DXZ(BETAX); - const double dzz_betax = DZZ(BETAX); + const ELEM_TYPE dxx_betax = DXX(BETAX); + const ELEM_TYPE dxz_betax = DXZ(BETAX); + const ELEM_TYPE dzz_betax = DZZ(BETAX); - const double dxx_betaz = DXX(BETAZ); - const double dxz_betaz = DXZ(BETAZ); - const double dzz_betaz = DZZ(BETAZ); + const ELEM_TYPE dxx_betaz = DXX(BETAZ); + const ELEM_TYPE dxz_betaz = DXZ(BETAZ); + const ELEM_TYPE dzz_betaz = DZZ(BETAZ); #undef LOAD_VAR #undef DX @@ -357,25 +382,25 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext #undef DZZ #undef DXZ - const double d2beta[3][3][3] = { + const ELEM_TYPE d2beta[3][3][3] = { { - { dxx_betax, 0.0, dxx_betaz }, - { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, - { dxz_betax, 0.0, dxz_betaz }, + { dxx_betax, ZERO, dxx_betaz }, + { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO }, + { dxz_betax, ZERO, dxz_betaz }, }, { - { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, - { on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0, on_axis ? dxx_betaz : dx_betaz / x }, - { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, + { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO }, + { AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO, AXIS_VAL(dxx_betaz, dx_betaz / x) }, + { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO }, }, { - { dxz_betax, 0.0, dxz_betaz }, - { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, - { dzz_betax, 0.0, dzz_betaz }, + { dxz_betax, ZERO, dxz_betaz }, + { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO }, + { dzz_betax, ZERO, dzz_betaz }, }, }; - const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); + const ELEM_TYPE det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); // \tilde{γ}^{ij} gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det; @@ -397,7 +422,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext // β_j for (int j = 0; j < 3; j++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int k = 0; k < 3; k++) val += g[j][k] * beta[k]; betal[j] = val; @@ -429,7 +454,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) for (int n = 0; n < 3; n++) val += -gu[k][m] * gu[l][n] * dg[j][m][n]; @@ -440,9 +465,9 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) - val += 0.5 * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]); + val += HALF * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]); G[j][k][l] = val; } @@ -451,17 +476,17 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) for (int m = 0; m < 3; m++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int n = 0; n < 3; n++) { val += dgu[j][k][n] * (dg [l][m][n] + dg [m][l][n] - dg [n][l][m]) + gu [k][n] * (d2g[j][l][m][n] + d2g[j][m][l][n] - d2g[j][n][l][m]); } - dG[j][k][l][m] = 0.5 * val; + dG[j][k][l][m] = HALF * val; } // Γ^j = γ^{kl} Γ_{kl}^j for (int j = 0; j < 3; j++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) val += gu[k][l] * G[j][k][l]; @@ -471,7 +496,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext // ∂_j β_k for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += g[k][l] * dbeta[j][l] + dg[j][k][l] * beta[l]; dbetal[j][k] = val; @@ -481,7 +506,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) val += d2g[j][k][l][m] * beta[m] + dg[k][l][m] * dbeta[j][m] + dg[j][l][m] * dbeta[k][m] + g[l][m] * d2beta[j][k][m]; @@ -495,7 +520,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += gu[j][l] * K[l][k]; Km[j][k] = val; @@ -504,7 +529,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext // K^{ij} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += gu[j][l] * Km[k][l]; Ku[j][k] = val; @@ -514,16 +539,16 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { - dgdot[j][k][l] = -2.0 * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) + + dgdot[j][k][l] = -TWO * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) + d2betal[j][k][l] + d2betal[j][l][k]; for (int m = 0; m < 3; m++) - dgdot[j][k][l] += -2.0 * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]); + dgdot[j][k][l] += -TWO * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]); } // \dot{γ^{jk}} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - gudot[j][k] = 2.0 * alpha * Ku[j][k]; + gudot[j][k] = TWO * alpha * Ku[j][k]; for (int l = 0; l < 3; l++) { gudot[j][k] += -gu[j][l] * dbeta[l][k] - gu[k][l] * dbeta[l][j]; @@ -538,17 +563,17 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) val += gudot[j][m] * (dg [k][l][m] + dg [l][k][m] - dg [m][k][l]) + gu [j][m] * (dgdot[k][l][m] + dgdot[l][k][m] - dgdot[m][k][l]); - Gdot[j][k][l] = 0.5 * val; + Gdot[j][k][l] = HALF * val; } - Gammadot_term = 0.0; + Gammadot_term = ZERO; for (int j = 0; j < 3; j++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { val += gu[k][l] * Gdot[j][k][l]; @@ -559,30 +584,30 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += G[l][j][k] * dalpha[l]; D2alpha[j][k] = d2alpha[j][k] - val; } - Kij_Dij_alpha = 0.0; + Kij_Dij_alpha = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { Kij_Dij_alpha += Ku[j][k] * D2alpha[j][k]; } - k3 = 0.0; + k3 = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += Km[k][l] * Km[l][j]; k3 += val * Km[j][k]; } // K_{ij} K^{ij} - k2 = 0.0; + k2 = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) k2 += Km[j][k] * Km[k][j]; @@ -590,7 +615,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext // Ric_{jk} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) val += dG[m][m][j][k] - dG[k][m][j][m]; for (int m = 0; m < 3; m++) @@ -602,7 +627,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext // Ric^j_k for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += gu[j][l] * Ric[l][k]; Ricm[j][k] = val; @@ -610,14 +635,14 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { - double val = 0.0; + ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += K[j][l] * Km[l][k]; - Kdot[j][k] = -D2alpha[j][k] + alpha * (Ric[j][k] + trK * K[j][k] - 2 * val); + Kdot[j][k] = -D2alpha[j][k] + alpha * (Ric[j][k] + trK * K[j][k] - TWO * val); } - k_kdot = 0.0; + k_kdot = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { //k_kdot += kdot[j][k] * Ku[j][k]; @@ -625,11 +650,11 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext } - beta_term = 0.0; + beta_term = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { - double D2beta = 0.0; + ELEM_TYPE D2beta = ZERO; D2beta += d2beta[j][k][l]; @@ -646,15 +671,15 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext for (int k = 0; k < 3; k++) beta_term += -beta[k] * Ricm[j][k] * dalpha[j]; - solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_dc] = gu[0][0] + (on_axis ? gu[1][1] : 0.0); - solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_dc] = gu[2][2]; - solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_dc] = 2.0 * gu[0][2]; - solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_dc] = -X[0] + (on_axis ? 0.0 : gu[1][1] / x); - solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_dc] = -X[2]; - solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_dc] = -k2; + STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data, idx_dc, gu[0][0] + AXIS_VAL(gu[1][1], ZERO)); + STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data, idx_dc, gu[2][2]); + STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data, idx_dc, 2.0 * gu[0][2]); + STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data, idx_dc, -X[0] + AXIS_VAL(ZERO, gu[1][1] / x)); + STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data, idx_dc, -X[2]); + STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data, idx_dc, -k2); - solver->rhs[idx_rhs] = -2.0 * alpha * Kij_Dij_alpha + Gammadot_term + - 2.0 * alpha * (k_kdot + 2.0 * alpha * k3) + beta_term; + STORE(solver->rhs, idx_rhs, -TWO * alpha * Kij_Dij_alpha + Gammadot_term + + TWO * alpha * (k_kdot + TWO * alpha * k3) + beta_term); if (on_axis) { solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; @@ -662,3 +687,23 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext } } } + +#undef LOAD +#undef STORE + +#undef ZERO +#undef ONE +#undef TWO +#undef SIX +#undef EIGHT +#undef TWELVE +#undef C16 +#undef C30 + +#undef HALF +#undef THIRD +#undef C1_144 + +#undef ELEM_SPLAT +#undef ELEM_SIZE +#undef ELEM_TYPE -- cgit v1.2.3