summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2022-08-27 17:15:18 +0200
committerAnton Khirnov <anton@khirnov.net>2022-09-06 11:51:32 +0200
commitff2c324b778836e2b1ec44ec9cfeca3b73f5b942 (patch)
tree03cee1b9957c64acb8cf9d6ee157d482376c5053
parent7488dbe4b6da44e8ead17e95b57025352d6bef8a (diff)
fill_eq_coeffs_template: prepare for vectorization
Replace doubles with a generic ELEM_TYPE where appropriate. Use LOAD() and STORE() macros to write into arrays. Use named constants instead of floating-point literals.
-rw-r--r--src/fill_eq_coeffs_template.c525
1 files changed, 285 insertions, 240 deletions
diff --git a/src/fill_eq_coeffs_template.c b/src/fill_eq_coeffs_template.c
index 20706f6..c5fe884 100644
--- a/src/fill_eq_coeffs_template.c
+++ b/src/fill_eq_coeffs_template.c
@@ -1,56 +1,75 @@
-#define LOAD(arr, idx) (arr[idx])
+#define ELEM_TYPE double
+#define ELEM_SIZE sizeof(ELEM_TYPE) / sizeof(double)
+
+#define ELEM_SPLAT(x) (x)
-static inline double fd1_4(const double *arr, const ptrdiff_t stride, const double h_inv)
+#define LOAD(arr, idx) (arr[idx])
+#define STORE(arr, idx, val) arr[idx] = val
+
+// while gcc supports multiplying vectors by scalars, icc does not,
+// which forces us to do this
+#define ZERO ELEM_SPLAT(0.0)
+#define ONE ELEM_SPLAT(1.0)
+#define TWO ELEM_SPLAT(2.0)
+#define SIX ELEM_SPLAT(6.0)
+#define EIGHT ELEM_SPLAT(8.0)
+#define C16 ELEM_SPLAT(16.0)
+#define C30 ELEM_SPLAT(30.0)
+
+#define HALF ELEM_SPLAT(0.5)
+#define THIRD ELEM_SPLAT(1.0 / 3.0)
+#define C1_12 ELEM_SPLAT(1. / 12.0)
+#define C1_144 ELEM_SPLAT(1.0 / 144.0)
+
+static inline ELEM_TYPE fd1_4(const double *arr, const ptrdiff_t stride, const ELEM_TYPE h_inv)
{
- const double ap2 = LOAD(arr, 2 * stride);
- const double ap1 = LOAD(arr, 1 * stride);
- const double am1 = LOAD(arr, -1 * stride);
- const double am2 = LOAD(arr, -2 * stride);
- return (-ap2 + 8.0 * ap1 - 8.0 * am1 + am2) * h_inv / 12.0;
+ const ELEM_TYPE ap2 = LOAD(arr, 2 * stride);
+ const ELEM_TYPE ap1 = LOAD(arr, 1 * stride);
+ const ELEM_TYPE am1 = LOAD(arr, -1 * stride);
+ const ELEM_TYPE am2 = LOAD(arr, -2 * stride);
+ return (-ap2 + EIGHT * ap1 - EIGHT * am1 + am2) * h_inv * C1_12;
}
-static inline double fd2_4(const double *arr, const ptrdiff_t stride, const double h_inv)
+static inline ELEM_TYPE fd2_4(const double *arr, const ptrdiff_t stride, const ELEM_TYPE h_inv)
{
- const double ap2 = LOAD(arr, 2 * stride);
- const double ap1 = LOAD(arr, 1 * stride);
- const double a0 = LOAD(arr, 0 * stride);
- const double am1 = LOAD(arr, -1 * stride);
- const double am2 = LOAD(arr, -2 * stride);
- return (-ap2 + 16.0 * ap1 - 30.0 * a0 + 16.0 * am1 - am2) * SQR(h_inv) / 12.0;
+ const ELEM_TYPE ap2 = LOAD(arr, 2 * stride);
+ const ELEM_TYPE ap1 = LOAD(arr, 1 * stride);
+ const ELEM_TYPE a0 = LOAD(arr, 0 * stride);
+ const ELEM_TYPE am1 = LOAD(arr, -1 * stride);
+ const ELEM_TYPE am2 = LOAD(arr, -2 * stride);
+ return (-ap2 + C16 * ap1 - C30 * a0 + C16 * am1 - am2) * SQR(h_inv) * C1_12;
}
-static inline double fd11_4(const double *arr, ptrdiff_t stride_x, ptrdiff_t stride_z,
- double dx_inv, double dz_inv)
+static inline ELEM_TYPE fd11_4(const double *arr, ptrdiff_t stride_x, ptrdiff_t stride_z,
+ ELEM_TYPE dx_inv, ELEM_TYPE dz_inv)
{
- const double ap2p2 = LOAD(arr, 2 * stride_x + 2 * stride_z);
- const double ap2p1 = LOAD(arr, 2 * stride_x + 1 * stride_z);
- const double ap2m1 = LOAD(arr, 2 * stride_x - 1 * stride_z);
- const double ap2m2 = LOAD(arr, 2 * stride_x - 2 * stride_z);
-
- const double ap1p2 = LOAD(arr, 1 * stride_x + 2 * stride_z);
- const double ap1p1 = LOAD(arr, 1 * stride_x + 1 * stride_z);
- const double ap1m1 = LOAD(arr, 1 * stride_x - 1 * stride_z);
- const double ap1m2 = LOAD(arr, 1 * stride_x - 2 * stride_z);
-
- const double am1p2 = LOAD(arr, -1 * stride_x + 2 * stride_z);
- const double am1p1 = LOAD(arr, -1 * stride_x + 1 * stride_z);
- const double am1m1 = LOAD(arr, -1 * stride_x - 1 * stride_z);
- const double am1m2 = LOAD(arr, -1 * stride_x - 2 * stride_z);
-
- const double am2p2 = LOAD(arr, -2 * stride_x + 2 * stride_z);
- const double am2p1 = LOAD(arr, -2 * stride_x + 1 * stride_z);
- const double am2m1 = LOAD(arr, -2 * stride_x - 1 * stride_z);
- const double am2m2 = LOAD(arr, -2 * stride_x - 2 * stride_z);
-
- return (-1.0 * (-1.0 * ap2p2 + 8.0 * ap1p2 - 8.0 * am1p2 + 1.0 * am2p2)
- +8.0 * (-1.0 * ap2p1 + 8.0 * ap1p1 - 8.0 * am1p1 + 1.0 * am2p1)
- -8.0 * (-1.0 * ap2m1 + 8.0 * ap1m1 - 8.0 * am1m1 + 1.0 * am2m1)
- +1.0 * (-1.0 * ap2m2 + 8.0 * ap1m2 - 8.0 * am1m2 + 1.0 * am2m2)) * dx_inv * dz_inv / 144.0;
+ const ELEM_TYPE ap2p2 = LOAD(arr, 2 * stride_x + 2 * stride_z);
+ const ELEM_TYPE ap2p1 = LOAD(arr, 2 * stride_x + 1 * stride_z);
+ const ELEM_TYPE ap2m1 = LOAD(arr, 2 * stride_x - 1 * stride_z);
+ const ELEM_TYPE ap2m2 = LOAD(arr, 2 * stride_x - 2 * stride_z);
+
+ const ELEM_TYPE ap1p2 = LOAD(arr, 1 * stride_x + 2 * stride_z);
+ const ELEM_TYPE ap1p1 = LOAD(arr, 1 * stride_x + 1 * stride_z);
+ const ELEM_TYPE ap1m1 = LOAD(arr, 1 * stride_x - 1 * stride_z);
+ const ELEM_TYPE ap1m2 = LOAD(arr, 1 * stride_x - 2 * stride_z);
+
+ const ELEM_TYPE am1p2 = LOAD(arr, -1 * stride_x + 2 * stride_z);
+ const ELEM_TYPE am1p1 = LOAD(arr, -1 * stride_x + 1 * stride_z);
+ const ELEM_TYPE am1m1 = LOAD(arr, -1 * stride_x - 1 * stride_z);
+ const ELEM_TYPE am1m2 = LOAD(arr, -1 * stride_x - 2 * stride_z);
+
+ const ELEM_TYPE am2p2 = LOAD(arr, -2 * stride_x + 2 * stride_z);
+ const ELEM_TYPE am2p1 = LOAD(arr, -2 * stride_x + 1 * stride_z);
+ const ELEM_TYPE am2m1 = LOAD(arr, -2 * stride_x - 1 * stride_z);
+ const ELEM_TYPE am2m2 = LOAD(arr, -2 * stride_x - 2 * stride_z);
+
+ return (-ONE * (-ap2p2 + EIGHT * ap1p2 - EIGHT * am1p2 + am2p2)
+ +EIGHT * (-ap2p1 + EIGHT * ap1p1 - EIGHT * am1p1 + am2p1)
+ -EIGHT * (-ap2m1 + EIGHT * ap1m1 - EIGHT * am1m1 + am2m1)
+ +ONE * (-ap2m2 + EIGHT * ap1m2 - EIGHT * am1m2 + am2m2)) * dx_inv * dz_inv * C1_144;
}
-#undef LOAD
-
#if 0
#define FD8(arr, stride, h_inv) \
((-3.0 * arr[4 * stride] + 32.0 * arr[3 * stride] - 168.0 * arr[2 * stride] + 672.0 * arr[1 * stride] \
@@ -108,8 +127,11 @@ static double diff8_dxz(const double *arr, ptrdiff_t stride, double dx_inv, doub
static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext *solver,
const double * const vars[], const int idx_z,
- const ptrdiff_t stride_z, const double dx_inv, const double dz_inv)
+ const ptrdiff_t stride_z, const double dx_inv_scal, const double dz_inv_scal)
{
+ const ELEM_TYPE dx_inv = ELEM_SPLAT(dx_inv_scal);
+ const ELEM_TYPE dz_inv = ELEM_SPLAT(dz_inv_scal);
+
for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) {
const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]);
const int idx_dc = idx_z * solver->diff_coeffs[0]->stride + idx_x;
@@ -118,237 +140,240 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
const double x = vars[RHS_VAR_X][idx_src];
const int on_axis = fabs(x) < 1e-13;
- double K[3][3], Km[3][3], Ku[3][3], dK[3][3][3];
- double gtu[3][3], g[3][3], gu[3][3];
- double dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3];
- double G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3];
- double Ric[3][3], Ricm[3][3];
- double dgdot[3][3][3];
- double D2alpha[3][3];
- double Kdot[3][3];
- double k2, Kij_Dij_alpha, k_kdot, k3;
- double Gammadot_term, beta_term;
+#define AXIS_VAL(axis, normal) (on_axis ? (axis) : (normal))
+
+ ELEM_TYPE K[3][3], Km[3][3], Ku[3][3], dK[3][3][3];
+ ELEM_TYPE gtu[3][3], g[3][3], gu[3][3];
+ ELEM_TYPE dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3];
+ ELEM_TYPE G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3];
+ ELEM_TYPE Ric[3][3], Ricm[3][3];
+ ELEM_TYPE dgdot[3][3][3];
+ ELEM_TYPE D2alpha[3][3];
+ ELEM_TYPE Kdot[3][3];
+ ELEM_TYPE k2, Kij_Dij_alpha, k_kdot, k3;
+ ELEM_TYPE Gammadot_term, beta_term;
- double betal[3], dbetal[3][3], d2betal[3][3][3];
+ ELEM_TYPE betal[3], dbetal[3][3], d2betal[3][3][3];
-#define LOAD_VAR(var) ( vars[RHS_VAR_ ## var][idx_src])
+#define LOAD_VAR(var) LOAD ( vars[RHS_VAR_ ## var], idx_src)
#define DX(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv ))
#define DZ(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv))
#define DXX(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv))
#define DZZ(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv))
#define DXZ(var) (diff11(&vars[RHS_VAR_ ## var][idx_src], 1, stride_z, dx_inv, dz_inv))
- const double gtxx = LOAD_VAR(GTXX);
- const double gtyy = LOAD_VAR(GTYY);
- const double gtzz = LOAD_VAR(GTZZ);
- const double gtxy = LOAD_VAR(GTXY);
- const double gtxz = LOAD_VAR(GTXZ);
- const double gtyz = LOAD_VAR(GTYZ);
+ const ELEM_TYPE gtxx = LOAD_VAR(GTXX);
+ const ELEM_TYPE gtyy = LOAD_VAR(GTYY);
+ const ELEM_TYPE gtzz = LOAD_VAR(GTZZ);
+ const ELEM_TYPE gtxy = LOAD_VAR(GTXY);
+ const ELEM_TYPE gtxz = LOAD_VAR(GTXZ);
+ const ELEM_TYPE gtyz = LOAD_VAR(GTYZ);
- const double gt[3][3] = {{ gtxx, gtxy, gtxz },
- { gtxy, gtyy, gtyz },
- { gtxz, gtyz, gtzz }};
+ const ELEM_TYPE gt[3][3] = {{ gtxx, gtxy, gtxz },
+ { gtxy, gtyy, gtyz },
+ { gtxz, gtyz, gtzz }};
- const double dx_gt11 = DX(GTXX);
- const double dx_gt22 = DX(GTYY);
- const double dx_gt33 = DX(GTZZ);
- const double dx_gt13 = DX(GTXZ);
+ const ELEM_TYPE dx_gt11 = DX(GTXX);
+ const ELEM_TYPE dx_gt22 = DX(GTYY);
+ const ELEM_TYPE dx_gt33 = DX(GTZZ);
+ const ELEM_TYPE dx_gt13 = DX(GTXZ);
- const double dz_gt11 = DZ(GTXX);
- const double dz_gt22 = DZ(GTYY);
- const double dz_gt33 = DZ(GTZZ);
- const double dz_gt13 = DZ(GTXZ);
+ const ELEM_TYPE dz_gt11 = DZ(GTXX);
+ const ELEM_TYPE dz_gt22 = DZ(GTYY);
+ const ELEM_TYPE dz_gt33 = DZ(GTZZ);
+ const ELEM_TYPE dz_gt13 = DZ(GTXZ);
- const double dgt[3][3][3] = {
+ const ELEM_TYPE dgt[3][3][3] = {
{
- { dx_gt11, 0.0, dx_gt13 },
- { 0.0, dx_gt22, 0.0 },
- { dx_gt13, 0.0, dx_gt33 },
+ { dx_gt11, ZERO, dx_gt13 },
+ { ZERO, dx_gt22, ZERO },
+ { dx_gt13, ZERO, dx_gt33 },
},
{
- { 0.0, on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0 },
- { on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0, on_axis ? dx_gt13 : gtxz / x },
- { 0.0, on_axis ? dx_gt13 : gtxz / x, 0.0 },
+ { ZERO, AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO },
+ { AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) / x), ZERO, AXIS_VAL(dx_gt13, gtxz / x) },
+ { ZERO, AXIS_VAL(dx_gt13, gtxz / x), ZERO },
},
{
- { dz_gt11, 0.0, dz_gt13 },
- { 0.0, dz_gt22, 0.0 },
- { dz_gt13, 0.0, dz_gt33 },
+ { dz_gt11, ZERO, dz_gt13 },
+ { ZERO, dz_gt22, ZERO },
+ { dz_gt13, ZERO, dz_gt33 },
},
};
- const double dxx_gt11 = DXX(GTXX);
- const double dxx_gt22 = DXX(GTYY);
- const double dxx_gt33 = DXX(GTZZ);
- const double dxx_gt13 = DXX(GTXZ);
+ const ELEM_TYPE dxx_gt11 = DXX(GTXX);
+ const ELEM_TYPE dxx_gt22 = DXX(GTYY);
+ const ELEM_TYPE dxx_gt33 = DXX(GTZZ);
+ const ELEM_TYPE dxx_gt13 = DXX(GTXZ);
- const double dxz_gt11 = DXZ(GTXX);
- const double dxz_gt22 = DXZ(GTYY);
- const double dxz_gt33 = DXZ(GTZZ);
- const double dxz_gt13 = DXZ(GTXZ);
+ const ELEM_TYPE dxz_gt11 = DXZ(GTXX);
+ const ELEM_TYPE dxz_gt22 = DXZ(GTYY);
+ const ELEM_TYPE dxz_gt33 = DXZ(GTZZ);
+ const ELEM_TYPE dxz_gt13 = DXZ(GTXZ);
- const double dzz_gt11 = DZZ(GTXX);
- const double dzz_gt22 = DZZ(GTYY);
- const double dzz_gt33 = DZZ(GTZZ);
- const double dzz_gt13 = DZZ(GTXZ);
+ const ELEM_TYPE dzz_gt11 = DZZ(GTXX);
+ const ELEM_TYPE dzz_gt22 = DZZ(GTYY);
+ const ELEM_TYPE dzz_gt33 = DZZ(GTZZ);
+ const ELEM_TYPE dzz_gt13 = DZZ(GTXZ);
- const double d2gt[3][3][3][3] = {
+ const ELEM_TYPE d2gt[3][3][3][3] = {
{
{
- { dxx_gt11, 0.0, dxx_gt13 },
- { 0.0, dxx_gt22, 0.0 },
- { dxx_gt13, 0.0, dxx_gt33 },
+ { dxx_gt11, ZERO, dxx_gt13 },
+ { ZERO, dxx_gt22, ZERO },
+ { dxx_gt13, ZERO, dxx_gt33 },
},
{
- { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 },
- { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0,
- on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) },
- { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 },
+ { ZERO, AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO },
+ { AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO,
+ AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) },
+ { ZERO, AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO },
},
{
- { dxz_gt11, 0.0, dxz_gt13 },
- { 0.0, dxz_gt22, 0.0 },
- { dxz_gt13, 0.0, dxz_gt33 },
+ { dxz_gt11, ZERO, dxz_gt13 },
+ { ZERO, dxz_gt22, ZERO },
+ { dxz_gt13, ZERO, dxz_gt33 },
},
},
{
{
- { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 },
- { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0,
- on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) },
- { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 },
+ { ZERO, AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO },
+ { AXIS_VAL(0.5 * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x)), ZERO,
+ AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) },
+ { ZERO, AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO },
},
{
- { on_axis ? dxx_gt22 : dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x), 0.0,
- on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) },
- { 0.0, on_axis ? dxx_gt11 : dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x), 0.0 },
- { on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0, on_axis ? dxx_gt33 : dx_gt33 / x },
+ { AXIS_VAL(dxx_gt22, dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x)), ZERO,
+ AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)) },
+ { ZERO, AXIS_VAL(dxx_gt11, dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x)), ZERO },
+ { AXIS_VAL(0.5 * dxx_gt13, dx_gt13 / x - gtxz / SQR(x)), ZERO,
+ AXIS_VAL(dxx_gt33, dx_gt33 / x) },
},
{
- { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 },
- { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0,
- on_axis ? dxz_gt13 : dz_gt13 / x },
- { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 },
+ { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO },
+ { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO,
+ AXIS_VAL(dxz_gt13, dz_gt13 / x) },
+ { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO },
},
},
{
{
- { dxz_gt11, 0.0, dxz_gt13 },
- { 0.0, dxz_gt22, 0.0 },
- { dxz_gt13, 0.0, dxz_gt33 },
+ { dxz_gt11, ZERO, dxz_gt13 },
+ { ZERO, dxz_gt22, ZERO },
+ { dxz_gt13, ZERO, dxz_gt33 },
},
{
- { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 },
- { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0,
- on_axis ? dxz_gt13 : dz_gt13 / x },
- { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 },
+ { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO },
+ { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) / x), ZERO,
+ AXIS_VAL(dxz_gt13, dz_gt13 / x) },
+ { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 / x), ZERO },
},
{
- { dzz_gt11, 0.0, dzz_gt13 },
- { 0.0, dzz_gt22, 0.0 },
- { dzz_gt13, 0.0, dzz_gt33 },
+ { dzz_gt11, ZERO, dzz_gt13 },
+ { ZERO, dzz_gt22, ZERO },
+ { dzz_gt13, ZERO, dzz_gt33 },
},
},
};
- const double Atxx = LOAD_VAR(ATXX);
- const double Atyy = LOAD_VAR(ATYY);
- const double Atzz = LOAD_VAR(ATZZ);
- const double Atxy = LOAD_VAR(ATXY);
- const double Atxz = LOAD_VAR(ATXZ);
- const double Atyz = LOAD_VAR(ATYZ);
+ const ELEM_TYPE Atxx = LOAD_VAR(ATXX);
+ const ELEM_TYPE Atyy = LOAD_VAR(ATYY);
+ const ELEM_TYPE Atzz = LOAD_VAR(ATZZ);
+ const ELEM_TYPE Atxy = LOAD_VAR(ATXY);
+ const ELEM_TYPE Atxz = LOAD_VAR(ATXZ);
+ const ELEM_TYPE Atyz = LOAD_VAR(ATYZ);
- const double At[3][3] = {
+ const ELEM_TYPE At[3][3] = {
{ Atxx, Atxy, Atxz },
{ Atxy, Atyy, Atyz },
{ Atxz, Atyz, Atzz }};
- const double dx_At11 = DX(ATXX);
- const double dx_At22 = DX(ATYY);
- const double dx_At33 = DX(ATZZ);
- const double dx_At13 = DX(ATXZ);
+ const ELEM_TYPE dx_At11 = DX(ATXX);
+ const ELEM_TYPE dx_At22 = DX(ATYY);
+ const ELEM_TYPE dx_At33 = DX(ATZZ);
+ const ELEM_TYPE dx_At13 = DX(ATXZ);
- const double dz_At11 = DZ(ATXX);
- const double dz_At22 = DZ(ATYY);
- const double dz_At33 = DZ(ATZZ);
- const double dz_At13 = DZ(ATXZ);
+ const ELEM_TYPE dz_At11 = DZ(ATXX);
+ const ELEM_TYPE dz_At22 = DZ(ATYY);
+ const ELEM_TYPE dz_At33 = DZ(ATZZ);
+ const ELEM_TYPE dz_At13 = DZ(ATXZ);
- const double dAt[3][3][3] = {
+ const ELEM_TYPE dAt[3][3][3] = {
{
- { dx_At11, 0.0, dx_At13 },
- { 0.0, dx_At22, 0.0 },
- { dx_At13, 0.0, dx_At33 },
+ { dx_At11, ZERO, dx_At13 },
+ { ZERO, dx_At22, ZERO },
+ { dx_At13, ZERO, dx_At33 },
},
{
- { 0.0, on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0 },
- { on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0, on_axis ? dx_At13 : Atxz / x },
- { 0.0, on_axis ? dx_At13 : Atxz / x, 0.0 },
+ { ZERO, AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO },
+ { AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) / x), ZERO, AXIS_VAL(dx_At13, Atxz / x) },
+ { ZERO, AXIS_VAL(dx_At13, Atxz / x), ZERO },
},
{
- { dz_At11, 0.0, dz_At13 },
- { 0.0, dz_At22, 0.0 },
- { dz_At13, 0.0, dz_At33 },
+ { dz_At11, ZERO, dz_At13 },
+ { ZERO, dz_At22, ZERO },
+ { dz_At13, ZERO, dz_At33 },
},
};
- const double phi = LOAD_VAR(PHI);
- const double dphi[3] = { DX(PHI), 0.0, DZ(PHI) };
+ const ELEM_TYPE phi = LOAD_VAR(PHI);
+ const ELEM_TYPE dphi[3] = { DX(PHI), ZERO, DZ(PHI) };
- const double dxx_phi = DXX(PHI);
- const double dzz_phi = DZZ(PHI);
- const double dxz_phi = DXZ(PHI);
+ const ELEM_TYPE dxx_phi = DXX(PHI);
+ const ELEM_TYPE dzz_phi = DZZ(PHI);
+ const ELEM_TYPE dxz_phi = DXZ(PHI);
- const double d2phi[3][3] = {
- { dxx_phi, 0.0, dxz_phi },
- { 0.0, on_axis ? dxx_phi : dphi[0] / x, 0.0 },
- { dxz_phi, 0.0, dzz_phi },
+ const ELEM_TYPE d2phi[3][3] = {
+ { dxx_phi, ZERO, dxz_phi },
+ { ZERO, AXIS_VAL(dxx_phi, dphi[0] / x), ZERO },
+ { dxz_phi, ZERO, dzz_phi },
};
- const double trK = LOAD_VAR(TRK);
- const double dtrK[3] = { DX(TRK), 0.0, DZ(TRK) };
+ const ELEM_TYPE trK = LOAD_VAR(TRK);
+ const ELEM_TYPE dtrK[3] = { DX(TRK), ZERO, DZ(TRK) };
- const double Xtx = LOAD_VAR(XTX);
- const double Xtz = LOAD_VAR(XTZ);
+ const ELEM_TYPE Xtx = LOAD_VAR(XTX);
+ const ELEM_TYPE Xtz = LOAD_VAR(XTZ);
- const double alpha = LOAD_VAR(ALPHA);
- const double dx_alpha = DX(ALPHA);
- const double dz_alpha = DZ(ALPHA);
+ const ELEM_TYPE alpha = LOAD_VAR(ALPHA);
+ const ELEM_TYPE dx_alpha = DX(ALPHA);
+ const ELEM_TYPE dz_alpha = DZ(ALPHA);
- const double dalpha[3] = { dx_alpha, 0.0, dz_alpha };
+ const ELEM_TYPE dalpha[3] = { dx_alpha, ZERO, dz_alpha };
- const double dxx_alpha = DXX(ALPHA);
- const double dzz_alpha = DZZ(ALPHA);
- const double dxz_alpha = DXZ(ALPHA);
+ const ELEM_TYPE dxx_alpha = DXX(ALPHA);
+ const ELEM_TYPE dzz_alpha = DZZ(ALPHA);
+ const ELEM_TYPE dxz_alpha = DXZ(ALPHA);
- const double d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha },
- { 0, on_axis ? dxx_alpha : dx_alpha / x, 0 },
- { dxz_alpha, 0, dzz_alpha }};
+ const ELEM_TYPE d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha },
+ { 0, AXIS_VAL(dxx_alpha, dx_alpha / x), 0 },
+ { dxz_alpha, 0, dzz_alpha }};
- const double betax = LOAD_VAR(BETAX);
- const double betaz = LOAD_VAR(BETAX);
+ const ELEM_TYPE betax = LOAD_VAR(BETAX);
+ const ELEM_TYPE betaz = LOAD_VAR(BETAX);
- const double beta[3] = { betax, 0.0, betaz };
+ const ELEM_TYPE beta[3] = { betax, ZERO, betaz };
- const double dx_betax = DX(BETAX);
- const double dz_betax = DZ(BETAX);
+ const ELEM_TYPE dx_betax = DX(BETAX);
+ const ELEM_TYPE dz_betax = DZ(BETAX);
- const double dx_betaz = DX(BETAZ);
- const double dz_betaz = DZ(BETAZ);
+ const ELEM_TYPE dx_betaz = DX(BETAZ);
+ const ELEM_TYPE dz_betaz = DZ(BETAZ);
- const double dbeta[3][3] = {{ dx_betax, 0.0, dx_betaz },
- { 0.0, on_axis ? dx_betax : betax / x, 0.0 },
- { dz_betax, 0.0, dz_betaz }};
+ const ELEM_TYPE dbeta[3][3] = {{ dx_betax, ZERO, dx_betaz },
+ { ZERO, AXIS_VAL(dx_betax, betax / x), ZERO },
+ { dz_betax, ZERO, dz_betaz }};
- const double dxx_betax = DXX(BETAX);
- const double dxz_betax = DXZ(BETAX);
- const double dzz_betax = DZZ(BETAX);
+ const ELEM_TYPE dxx_betax = DXX(BETAX);
+ const ELEM_TYPE dxz_betax = DXZ(BETAX);
+ const ELEM_TYPE dzz_betax = DZZ(BETAX);
- const double dxx_betaz = DXX(BETAZ);
- const double dxz_betaz = DXZ(BETAZ);
- const double dzz_betaz = DZZ(BETAZ);
+ const ELEM_TYPE dxx_betaz = DXX(BETAZ);
+ const ELEM_TYPE dxz_betaz = DXZ(BETAZ);
+ const ELEM_TYPE dzz_betaz = DZZ(BETAZ);
#undef LOAD_VAR
#undef DX
@@ -357,25 +382,25 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
#undef DZZ
#undef DXZ
- const double d2beta[3][3][3] = {
+ const ELEM_TYPE d2beta[3][3][3] = {
{
- { dxx_betax, 0.0, dxx_betaz },
- { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 },
- { dxz_betax, 0.0, dxz_betaz },
+ { dxx_betax, ZERO, dxx_betaz },
+ { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO },
+ { dxz_betax, ZERO, dxz_betaz },
},
{
- { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 },
- { on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0, on_axis ? dxx_betaz : dx_betaz / x },
- { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 },
+ { ZERO, AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO },
+ { AXIS_VAL(0.5 * dxx_betax, dx_betax / x - betax / SQR(x)), ZERO, AXIS_VAL(dxx_betaz, dx_betaz / x) },
+ { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO },
},
{
- { dxz_betax, 0.0, dxz_betaz },
- { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 },
- { dzz_betax, 0.0, dzz_betaz },
+ { dxz_betax, ZERO, dxz_betaz },
+ { ZERO, AXIS_VAL(dxz_betax, dz_betax / x), ZERO },
+ { dzz_betax, ZERO, dzz_betaz },
},
};
- const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz);
+ const ELEM_TYPE det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz);
// \tilde{γ}^{ij}
gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det;
@@ -397,7 +422,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
// β_j
for (int j = 0; j < 3; j++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int k = 0; k < 3; k++)
val += g[j][k] * beta[k];
betal[j] = val;
@@ -429,7 +454,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int m = 0; m < 3; m++)
for (int n = 0; n < 3; n++)
val += -gu[k][m] * gu[l][n] * dg[j][m][n];
@@ -440,9 +465,9 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int m = 0; m < 3; m++)
- val += 0.5 * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]);
+ val += HALF * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]);
G[j][k][l] = val;
}
@@ -451,17 +476,17 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++)
for (int m = 0; m < 3; m++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int n = 0; n < 3; n++) {
val += dgu[j][k][n] * (dg [l][m][n] + dg [m][l][n] - dg [n][l][m]) +
gu [k][n] * (d2g[j][l][m][n] + d2g[j][m][l][n] - d2g[j][n][l][m]);
}
- dG[j][k][l][m] = 0.5 * val;
+ dG[j][k][l][m] = HALF * val;
}
// Γ^j = γ^{kl} Γ_{kl}^j
for (int j = 0; j < 3; j++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++)
val += gu[k][l] * G[j][k][l];
@@ -471,7 +496,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
// ∂_j β_k
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int l = 0; l < 3; l++)
val += g[k][l] * dbeta[j][l] + dg[j][k][l] * beta[l];
dbetal[j][k] = val;
@@ -481,7 +506,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int m = 0; m < 3; m++)
val += d2g[j][k][l][m] * beta[m] + dg[k][l][m] * dbeta[j][m] + dg[j][l][m] * dbeta[k][m] +
g[l][m] * d2beta[j][k][m];
@@ -495,7 +520,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int l = 0; l < 3; l++)
val += gu[j][l] * K[l][k];
Km[j][k] = val;
@@ -504,7 +529,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
// K^{ij}
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int l = 0; l < 3; l++)
val += gu[j][l] * Km[k][l];
Ku[j][k] = val;
@@ -514,16 +539,16 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
- dgdot[j][k][l] = -2.0 * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) +
+ dgdot[j][k][l] = -TWO * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) +
d2betal[j][k][l] + d2betal[j][l][k];
for (int m = 0; m < 3; m++)
- dgdot[j][k][l] += -2.0 * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]);
+ dgdot[j][k][l] += -TWO * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]);
}
// \dot{γ^{jk}}
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- gudot[j][k] = 2.0 * alpha * Ku[j][k];
+ gudot[j][k] = TWO * alpha * Ku[j][k];
for (int l = 0; l < 3; l++) {
gudot[j][k] += -gu[j][l] * dbeta[l][k] - gu[k][l] * dbeta[l][j];
@@ -538,17 +563,17 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int m = 0; m < 3; m++)
val += gudot[j][m] * (dg [k][l][m] + dg [l][k][m] - dg [m][k][l]) +
gu [j][m] * (dgdot[k][l][m] + dgdot[l][k][m] - dgdot[m][k][l]);
- Gdot[j][k][l] = 0.5 * val;
+ Gdot[j][k][l] = HALF * val;
}
- Gammadot_term = 0.0;
+ Gammadot_term = ZERO;
for (int j = 0; j < 3; j++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
val += gu[k][l] * Gdot[j][k][l];
@@ -559,30 +584,30 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int l = 0; l < 3; l++)
val += G[l][j][k] * dalpha[l];
D2alpha[j][k] = d2alpha[j][k] - val;
}
- Kij_Dij_alpha = 0.0;
+ Kij_Dij_alpha = ZERO;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
Kij_Dij_alpha += Ku[j][k] * D2alpha[j][k];
}
- k3 = 0.0;
+ k3 = ZERO;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int l = 0; l < 3; l++)
val += Km[k][l] * Km[l][j];
k3 += val * Km[j][k];
}
// K_{ij} K^{ij}
- k2 = 0.0;
+ k2 = ZERO;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
k2 += Km[j][k] * Km[k][j];
@@ -590,7 +615,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
// Ric_{jk}
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int m = 0; m < 3; m++)
val += dG[m][m][j][k] - dG[k][m][j][m];
for (int m = 0; m < 3; m++)
@@ -602,7 +627,7 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
// Ric^j_k
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int l = 0; l < 3; l++)
val += gu[j][l] * Ric[l][k];
Ricm[j][k] = val;
@@ -610,14 +635,14 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
- double val = 0.0;
+ ELEM_TYPE val = ZERO;
for (int l = 0; l < 3; l++)
val += K[j][l] * Km[l][k];
- Kdot[j][k] = -D2alpha[j][k] + alpha * (Ric[j][k] + trK * K[j][k] - 2 * val);
+ Kdot[j][k] = -D2alpha[j][k] + alpha * (Ric[j][k] + trK * K[j][k] - TWO * val);
}
- k_kdot = 0.0;
+ k_kdot = ZERO;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) {
//k_kdot += kdot[j][k] * Ku[j][k];
@@ -625,11 +650,11 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
}
- beta_term = 0.0;
+ beta_term = ZERO;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++)
for (int l = 0; l < 3; l++) {
- double D2beta = 0.0;
+ ELEM_TYPE D2beta = ZERO;
D2beta += d2beta[j][k][l];
@@ -646,15 +671,15 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
for (int k = 0; k < 3; k++)
beta_term += -beta[k] * Ricm[j][k] * dalpha[j];
- solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_dc] = gu[0][0] + (on_axis ? gu[1][1] : 0.0);
- solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_dc] = gu[2][2];
- solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_dc] = 2.0 * gu[0][2];
- solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_dc] = -X[0] + (on_axis ? 0.0 : gu[1][1] / x);
- solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_dc] = -X[2];
- solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_dc] = -k2;
+ STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data, idx_dc, gu[0][0] + AXIS_VAL(gu[1][1], ZERO));
+ STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data, idx_dc, gu[2][2]);
+ STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data, idx_dc, 2.0 * gu[0][2]);
+ STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data, idx_dc, -X[0] + AXIS_VAL(ZERO, gu[1][1] / x));
+ STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data, idx_dc, -X[2]);
+ STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data, idx_dc, -k2);
- solver->rhs[idx_rhs] = -2.0 * alpha * Kij_Dij_alpha + Gammadot_term +
- 2.0 * alpha * (k_kdot + 2.0 * alpha * k3) + beta_term;
+ STORE(solver->rhs, idx_rhs, -TWO * alpha * Kij_Dij_alpha + Gammadot_term +
+ TWO * alpha * (k_kdot + TWO * alpha * k3) + beta_term);
if (on_axis) {
solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1];
@@ -662,3 +687,23 @@ static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext
}
}
}
+
+#undef LOAD
+#undef STORE
+
+#undef ZERO
+#undef ONE
+#undef TWO
+#undef SIX
+#undef EIGHT
+#undef TWELVE
+#undef C16
+#undef C30
+
+#undef HALF
+#undef THIRD
+#undef C1_144
+
+#undef ELEM_SPLAT
+#undef ELEM_SIZE
+#undef ELEM_TYPE