#ifdef ELEM_SCALAR # define TYPE scalar # define ELEM_TYPE double # define ELEM_SPLAT(x) (x) # define LOAD(arr, idx) (arr[idx]) # define STORE(arr, idx, val) arr[idx] = val #elif defined ELEM_AVX512 # define TYPE avx512 # define ELEM_TYPE __m512d # define ELEM_SPLAT(x) _mm512_set1_pd(x) #define LOAD(arr, idx) _mm512_loadu_pd((arr) + (idx)) #define STORE(arr, idx, val) _mm512_storeu_pd((arr) + (idx), val) #elif defined ELEM_AVX2 # define TYPE avx2 # define ELEM_TYPE __m256d # define ELEM_SPLAT(x) _mm256_set1_pd(x) #define LOAD(arr, idx) _mm256_loadu_pd((arr) + (idx)) #define STORE(arr, idx, val) _mm256_storeu_pd((arr) + (idx), val) #else # error "Unknown element type" #endif #define ELEM_SIZE sizeof(ELEM_TYPE) / sizeof(double) // while gcc supports multiplying vectors by scalars, icc does not, // which forces us to do this #define ZERO ELEM_SPLAT(0.0) #define ONE ELEM_SPLAT(1.0) #define TWO ELEM_SPLAT(2.0) #define SIX ELEM_SPLAT(6.0) #define EIGHT ELEM_SPLAT(8.0) #define C16 ELEM_SPLAT(16.0) #define C30 ELEM_SPLAT(30.0) #define HALF ELEM_SPLAT(0.5) #define THIRD ELEM_SPLAT(1.0 / 3.0) #define C1_12 ELEM_SPLAT(1. / 12.0) #define C1_144 ELEM_SPLAT(1.0 / 144.0) #define JOIN2(a, b) a ## _ ## b #define FUNC2(name, type) JOIN2(name, type) #define FUNC(name) FUNC2(name, TYPE) static inline ELEM_TYPE FUNC(fd1_4)(const double *arr, const ptrdiff_t stride, const ELEM_TYPE h_inv) { const ELEM_TYPE ap2 = LOAD(arr, 2 * stride); const ELEM_TYPE ap1 = LOAD(arr, 1 * stride); const ELEM_TYPE am1 = LOAD(arr, -1 * stride); const ELEM_TYPE am2 = LOAD(arr, -2 * stride); return (-ap2 + EIGHT * ap1 - EIGHT * am1 + am2) * h_inv * C1_12; } static inline ELEM_TYPE FUNC(fd2_4)(const double *arr, const ptrdiff_t stride, const ELEM_TYPE h_inv) { const ELEM_TYPE ap2 = LOAD(arr, 2 * stride); const ELEM_TYPE ap1 = LOAD(arr, 1 * stride); const ELEM_TYPE a0 = LOAD(arr, 0 * stride); const ELEM_TYPE am1 = LOAD(arr, -1 * stride); const ELEM_TYPE am2 = LOAD(arr, -2 * stride); return (-ap2 + C16 * ap1 - C30 * a0 + C16 * am1 - am2) * SQR(h_inv) * C1_12; } static inline ELEM_TYPE FUNC(fd11_4)(const double *arr, ptrdiff_t stride_x, ptrdiff_t stride_z, ELEM_TYPE dx_inv, ELEM_TYPE dz_inv) { const ELEM_TYPE ap2p2 = LOAD(arr, 2 * stride_x + 2 * stride_z); const ELEM_TYPE ap2p1 = LOAD(arr, 2 * stride_x + 1 * stride_z); const ELEM_TYPE ap2m1 = LOAD(arr, 2 * stride_x - 1 * stride_z); const ELEM_TYPE ap2m2 = LOAD(arr, 2 * stride_x - 2 * stride_z); const ELEM_TYPE ap1p2 = LOAD(arr, 1 * stride_x + 2 * stride_z); const ELEM_TYPE ap1p1 = LOAD(arr, 1 * stride_x + 1 * stride_z); const ELEM_TYPE ap1m1 = LOAD(arr, 1 * stride_x - 1 * stride_z); const ELEM_TYPE ap1m2 = LOAD(arr, 1 * stride_x - 2 * stride_z); const ELEM_TYPE am1p2 = LOAD(arr, -1 * stride_x + 2 * stride_z); const ELEM_TYPE am1p1 = LOAD(arr, -1 * stride_x + 1 * stride_z); const ELEM_TYPE am1m1 = LOAD(arr, -1 * stride_x - 1 * stride_z); const ELEM_TYPE am1m2 = LOAD(arr, -1 * stride_x - 2 * stride_z); const ELEM_TYPE am2p2 = LOAD(arr, -2 * stride_x + 2 * stride_z); const ELEM_TYPE am2p1 = LOAD(arr, -2 * stride_x + 1 * stride_z); const ELEM_TYPE am2m1 = LOAD(arr, -2 * stride_x - 1 * stride_z); const ELEM_TYPE am2m2 = LOAD(arr, -2 * stride_x - 2 * stride_z); return (-ONE * (-ap2p2 + EIGHT * ap1p2 - EIGHT * am1p2 + am2p2) +EIGHT * (-ap2p1 + EIGHT * ap1p1 - EIGHT * am1p1 + am2p1) -EIGHT * (-ap2m1 + EIGHT * ap1m1 - EIGHT * am1m1 + am2m1) +ONE * (-ap2m2 + EIGHT * ap1m2 - EIGHT * am1m2 + am2m2)) * dx_inv * dz_inv * C1_144; } #if 0 #define FD8(arr, stride, h_inv) \ ((-3.0 * arr[4 * stride] + 32.0 * arr[3 * stride] - 168.0 * arr[2 * stride] + 672.0 * arr[1 * stride] \ - 672.0 * arr[-1 * stride] + 168.0 * arr[-2 * stride] - 32.0 * arr[-3 * stride] + 3.0 * arr[-4 * stride]) * h_inv / 840.0) #define FD28(arr, stride, h_inv) \ ((-9.0 * arr[ 4 * stride] + 128.0 * arr[ 3 * stride] - 1008.0 * arr[ 2 * stride] + 8064.0 * arr[ 1 * stride] \ -9.0 * arr[-4 * stride] + 128.0 * arr[-3 * stride] - 1008.0 * arr[-2 * stride] + 8064.0 * arr[-1 * stride] \ - 14350.0 * arr[0]) * SQR(h_inv) / 5040.0) static double diff8_dx(const double *arr, ptrdiff_t stride, double h_inv) { return FD8(arr, 1, h_inv); } static double diff8_dz(const double *arr, ptrdiff_t stride, double h_inv) { return FD8(arr, stride, h_inv); } static double diff8_dxx(const double *arr, ptrdiff_t stride, double h_inv) { return FD28(arr, 1, h_inv); } static double diff8_dzz(const double *arr, ptrdiff_t stride, double h_inv) { return FD28(arr, stride, h_inv); } static double diff8_dxz(const double *arr, ptrdiff_t stride, double dx_inv, double dz_inv) { static const double fd_coeffs[] = { 3.0, -32.0, 168.0, -672.0, 0.0, 672.0, -168.0, 32.0, -3.0 }; static const int stencil = ARRAY_ELEMS(fd_coeffs) / 2; double val = 0.0; for (int j = -stencil; j <= stencil; j++) { double tmp = 0.0; for (int i = -stencil; i <= stencil; i++) tmp += fd_coeffs[i + stencil] * arr[j * stride + i]; val += fd_coeffs[j + stencil] * tmp; } return val * dx_inv * dz_inv / SQR(840.0); } #define diff1 fd1_8 #define diff2 fd2_8 #define diff11 fd11_8 #else #define diff1 FUNC(fd1_4) #define diff2 FUNC(fd2_4) #define diff11 FUNC(fd11_4) #endif #define diff_dx(arr, stride, dx_inv, dz_inv) (diff1 (arr, 1, dx_inv)) #define diff_dz(arr, stride, dx_inv, dz_inv) (diff1 (arr, stride, dz_inv)) #define diff_dxx(arr, stride, dx_inv, dz_inv) (diff2 (arr, 1, dx_inv)) #define diff_dzz(arr, stride, dx_inv, dz_inv) (diff2 (arr, stride, dz_inv)) #define diff_dxz(arr, stride, dx_inv, dz_inv) (diff11(arr, 1, stride, dx_inv, dz_inv)) static void FUNC(fill_eq_coeffs_line)(const cGH *gh, const CoordPatch *cp, MG2DContext *solver, const double * const vars[], const int idx_x_start, const int idx_x_end, const int idx_z, const ptrdiff_t stride_z, const double dx_inv_scal, const double dz_inv_scal) { const ELEM_TYPE dx_inv = ELEM_SPLAT(dx_inv_scal); const ELEM_TYPE dz_inv = ELEM_SPLAT(dz_inv_scal); for (int idx_x = idx_x_start; idx_x < idx_x_end; idx_x += ELEM_SIZE) { const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); const int idx_dc = idx_z * solver->diff_coeffs[0]->stride + idx_x; const int idx_rhs = idx_z * solver->rhs_stride + idx_x; ELEM_TYPE K[3][3], Km[3][3], Ku[3][3], dK[3][3][3]; ELEM_TYPE gtu[3][3], g[3][3], gu[3][3]; ELEM_TYPE dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3]; ELEM_TYPE G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3]; ELEM_TYPE Ric[3][3], Ricm[3][3]; ELEM_TYPE dgdot[3][3][3]; ELEM_TYPE D2alpha[3][3]; ELEM_TYPE Kdot[3][3]; ELEM_TYPE k2, Kij_Dij_alpha, k_kdot, k3; ELEM_TYPE Gammadot_term, beta_term; ELEM_TYPE betal[3], dbetal[3][3], d2betal[3][3][3]; #define LOAD_VAR(var) LOAD ( vars[RHS_VAR_ ## var], idx_src) #define DX(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv )) #define DZ(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) #define DXX(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv)) #define DZZ(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) #define DXZ(var) (diff11(&vars[RHS_VAR_ ## var][idx_src], 1, stride_z, dx_inv, dz_inv)) const ELEM_TYPE x = LOAD_VAR(X); const ELEM_TYPE xinv = ONE / x; // special treatment for the axis is only implemented in the scalar version #ifdef ELEM_SCALAR const int on_axis = fabs(x) < 1e-13; # define AXIS_VAL(axis, normal) (on_axis ? (axis) : (normal)) #else const int on_axis = 0; # define AXIS_VAL(axis, normal) (normal) #endif const ELEM_TYPE gtxx = LOAD_VAR(GTXX); const ELEM_TYPE gtyy = LOAD_VAR(GTYY); const ELEM_TYPE gtzz = LOAD_VAR(GTZZ); const ELEM_TYPE gtxy = LOAD_VAR(GTXY); const ELEM_TYPE gtxz = LOAD_VAR(GTXZ); const ELEM_TYPE gtyz = LOAD_VAR(GTYZ); const ELEM_TYPE gt[3][3] = {{ gtxx, gtxy, gtxz }, { gtxy, gtyy, gtyz }, { gtxz, gtyz, gtzz }}; const ELEM_TYPE dx_gt11 = DX(GTXX); const ELEM_TYPE dx_gt22 = DX(GTYY); const ELEM_TYPE dx_gt33 = DX(GTZZ); const ELEM_TYPE dx_gt13 = DX(GTXZ); const ELEM_TYPE dz_gt11 = DZ(GTXX); const ELEM_TYPE dz_gt22 = DZ(GTYY); const ELEM_TYPE dz_gt33 = DZ(GTZZ); const ELEM_TYPE dz_gt13 = DZ(GTXZ); const ELEM_TYPE dgt[3][3][3] = { { { dx_gt11, ZERO, dx_gt13 }, { ZERO, dx_gt22, ZERO }, { dx_gt13, ZERO, dx_gt33 }, }, { { ZERO, AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) * xinv), ZERO }, { AXIS_VAL(dx_gt11 - dx_gt22, (gtxx - gtyy) * xinv), ZERO, AXIS_VAL(dx_gt13, gtxz * xinv) }, { ZERO, AXIS_VAL(dx_gt13, gtxz * xinv), ZERO }, }, { { dz_gt11, ZERO, dz_gt13 }, { ZERO, dz_gt22, ZERO }, { dz_gt13, ZERO, dz_gt33 }, }, }; const ELEM_TYPE dxx_gt11 = DXX(GTXX); const ELEM_TYPE dxx_gt22 = DXX(GTYY); const ELEM_TYPE dxx_gt33 = DXX(GTZZ); const ELEM_TYPE dxx_gt13 = DXX(GTXZ); const ELEM_TYPE dxz_gt11 = DXZ(GTXX); const ELEM_TYPE dxz_gt22 = DXZ(GTYY); const ELEM_TYPE dxz_gt33 = DXZ(GTZZ); const ELEM_TYPE dxz_gt13 = DXZ(GTXZ); const ELEM_TYPE dzz_gt11 = DZZ(GTXX); const ELEM_TYPE dzz_gt22 = DZZ(GTYY); const ELEM_TYPE dzz_gt33 = DZZ(GTZZ); const ELEM_TYPE dzz_gt13 = DZZ(GTXZ); const ELEM_TYPE d2gt[3][3][3][3] = { { { { dxx_gt11, ZERO, dxx_gt13 }, { ZERO, dxx_gt22, ZERO }, { dxx_gt13, ZERO, dxx_gt33 }, }, { { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO }, { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) }, { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO }, }, { { dxz_gt11, ZERO, dxz_gt13 }, { ZERO, dxz_gt22, ZERO }, { dxz_gt13, ZERO, dxz_gt33 }, }, }, { { { ZERO, AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO }, { AXIS_VAL(HALF * (dxx_gt11 - dxx_gt22), (dx_gt11 - dx_gt22) * xinv - (gtxx - gtyy) * SQR(xinv)), ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) }, { ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO }, }, { { AXIS_VAL(dxx_gt22, dx_gt11 * xinv - TWO * (gtxx - gtyy) * SQR(xinv)), ZERO, AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)) }, { ZERO, AXIS_VAL(dxx_gt11, dx_gt22 * xinv + TWO * (gtxx - gtyy) * SQR(xinv)), ZERO }, { AXIS_VAL(HALF * dxx_gt13, dx_gt13 * xinv - gtxz * SQR(xinv)), ZERO, AXIS_VAL(dxx_gt33, dx_gt33 * xinv) }, }, { { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO }, { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv) }, { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv), ZERO }, }, }, { { { dxz_gt11, ZERO, dxz_gt13 }, { ZERO, dxz_gt22, ZERO }, { dxz_gt13, ZERO, dxz_gt33 }, }, { { ZERO, AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO }, { AXIS_VAL(dxz_gt11 - dxz_gt22, (dz_gt11 - dz_gt22) * xinv), ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv) }, { ZERO, AXIS_VAL(dxz_gt13, dz_gt13 * xinv), ZERO }, }, { { dzz_gt11, ZERO, dzz_gt13 }, { ZERO, dzz_gt22, ZERO }, { dzz_gt13, ZERO, dzz_gt33 }, }, }, }; const ELEM_TYPE Atxx = LOAD_VAR(ATXX); const ELEM_TYPE Atyy = LOAD_VAR(ATYY); const ELEM_TYPE Atzz = LOAD_VAR(ATZZ); const ELEM_TYPE Atxy = LOAD_VAR(ATXY); const ELEM_TYPE Atxz = LOAD_VAR(ATXZ); const ELEM_TYPE Atyz = LOAD_VAR(ATYZ); const ELEM_TYPE At[3][3] = { { Atxx, Atxy, Atxz }, { Atxy, Atyy, Atyz }, { Atxz, Atyz, Atzz }}; const ELEM_TYPE dx_At11 = DX(ATXX); const ELEM_TYPE dx_At22 = DX(ATYY); const ELEM_TYPE dx_At33 = DX(ATZZ); const ELEM_TYPE dx_At13 = DX(ATXZ); const ELEM_TYPE dz_At11 = DZ(ATXX); const ELEM_TYPE dz_At22 = DZ(ATYY); const ELEM_TYPE dz_At33 = DZ(ATZZ); const ELEM_TYPE dz_At13 = DZ(ATXZ); const ELEM_TYPE dAt[3][3][3] = { { { dx_At11, ZERO, dx_At13 }, { ZERO, dx_At22, ZERO }, { dx_At13, ZERO, dx_At33 }, }, { { ZERO, AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) * xinv), ZERO }, { AXIS_VAL(dx_At11 - dx_At22, (Atxx - Atyy) * xinv), ZERO, AXIS_VAL(dx_At13, Atxz * xinv) }, { ZERO, AXIS_VAL(dx_At13, Atxz * xinv), ZERO }, }, { { dz_At11, ZERO, dz_At13 }, { ZERO, dz_At22, ZERO }, { dz_At13, ZERO, dz_At33 }, }, }; const ELEM_TYPE phi = LOAD_VAR(PHI); const ELEM_TYPE phi_inv = ONE / phi; const ELEM_TYPE phi2_inv = SQR(phi_inv); const ELEM_TYPE phi3_inv = phi2_inv * phi_inv; const ELEM_TYPE dphi[3] = { DX(PHI), ZERO, DZ(PHI) }; const ELEM_TYPE dxx_phi = DXX(PHI); const ELEM_TYPE dzz_phi = DZZ(PHI); const ELEM_TYPE dxz_phi = DXZ(PHI); const ELEM_TYPE d2phi[3][3] = { { dxx_phi, ZERO, dxz_phi }, { ZERO, AXIS_VAL(dxx_phi, dphi[0] * xinv), ZERO }, { dxz_phi, ZERO, dzz_phi }, }; const ELEM_TYPE trK = LOAD_VAR(TRK); const ELEM_TYPE dtrK[3] = { DX(TRK), ZERO, DZ(TRK) }; const ELEM_TYPE Xtx = LOAD_VAR(XTX); const ELEM_TYPE Xtz = LOAD_VAR(XTZ); const ELEM_TYPE alpha = LOAD_VAR(ALPHA); const ELEM_TYPE dx_alpha = DX(ALPHA); const ELEM_TYPE dz_alpha = DZ(ALPHA); const ELEM_TYPE dalpha[3] = { dx_alpha, ZERO, dz_alpha }; const ELEM_TYPE dxx_alpha = DXX(ALPHA); const ELEM_TYPE dzz_alpha = DZZ(ALPHA); const ELEM_TYPE dxz_alpha = DXZ(ALPHA); const ELEM_TYPE d2alpha[3][3] = {{ dxx_alpha, ZERO, dxz_alpha }, { ZERO, AXIS_VAL(dxx_alpha, dx_alpha / x), ZERO }, { dxz_alpha, ZERO, dzz_alpha }}; const ELEM_TYPE betax = LOAD_VAR(BETAX); const ELEM_TYPE betaz = LOAD_VAR(BETAX); const ELEM_TYPE beta[3] = { betax, ZERO, betaz }; const ELEM_TYPE dx_betax = DX(BETAX); const ELEM_TYPE dz_betax = DZ(BETAX); const ELEM_TYPE dx_betaz = DX(BETAZ); const ELEM_TYPE dz_betaz = DZ(BETAZ); const ELEM_TYPE dbeta[3][3] = {{ dx_betax, ZERO, dx_betaz }, { ZERO, AXIS_VAL(dx_betax, betax * xinv), ZERO }, { dz_betax, ZERO, dz_betaz }}; const ELEM_TYPE dxx_betax = DXX(BETAX); const ELEM_TYPE dxz_betax = DXZ(BETAX); const ELEM_TYPE dzz_betax = DZZ(BETAX); const ELEM_TYPE dxx_betaz = DXX(BETAZ); const ELEM_TYPE dxz_betaz = DXZ(BETAZ); const ELEM_TYPE dzz_betaz = DZZ(BETAZ); #undef LOAD_VAR #undef DX #undef DZ #undef DXX #undef DZZ #undef DXZ const ELEM_TYPE d2beta[3][3][3] = { { { dxx_betax, ZERO, dxx_betaz }, { ZERO, AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO }, { dxz_betax, ZERO, dxz_betaz }, }, { { ZERO, AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO }, { AXIS_VAL(HALF * dxx_betax, dx_betax * xinv - betax * SQR(xinv)), ZERO, AXIS_VAL(dxx_betaz, dx_betaz * xinv) }, { ZERO, AXIS_VAL(dxz_betax, dz_betax * xinv), ZERO }, }, { { dxz_betax, ZERO, dxz_betaz }, { ZERO, AXIS_VAL(dxz_betax, dz_betax * xinv), ZERO }, { dzz_betax, ZERO, dzz_betaz }, }, }; const ELEM_TYPE det = gtxx * gtyy * gtzz + TWO * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); const ELEM_TYPE det_inv = ONE / det; // \tilde{γ}^{ij} gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) * det_inv; gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) * det_inv; gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) * det_inv; gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) * det_inv; gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) * det_inv; gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) * det_inv; gtu[1][0] = gtu[0][1]; gtu[2][0] = gtu[0][2]; gtu[2][1] = gtu[1][2]; // γ_{jk}/^{jk} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { gu[j][k] = SQR(phi) * gtu[j][k]; g[j][k] = gt[j][k] * SQR(phi_inv); } // β_j for (int j = 0; j < 3; j++) { ELEM_TYPE val = ZERO; for (int k = 0; k < 3; k++) val += g[j][k] * beta[k]; betal[j] = val; } // ∂_j γ_{kl} // ∂_j K_{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { dg[j][k][l] = -TWO * dphi[j] * gt[k][l] * phi3_inv + dgt[j][k][l] * phi2_inv; dK[j][k][l] = -TWO * dphi[j] * At[k][l] * phi3_inv + dAt[j][k][l] * phi2_inv + THIRD * (dtrK[j] * g[k][l] + trK * dg[j][k][l]); } // ∂_{jk} g_{lm} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) for (int m = 0; m < 3; m++) { d2g[j][k][l][m] = SIX * gt [l][m] * dphi[j] * dphi[k] * SQR(phi2_inv) - TWO * gt [l][m] * d2phi[j][k] * phi3_inv - TWO * dgt [j][l][m] * dphi[k] * phi3_inv - TWO * dgt [k][l][m] * dphi[j] * phi3_inv + d2gt[j][k][l][m] * phi2_inv; } // ∂_j γ^{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) for (int n = 0; n < 3; n++) val += -gu[k][m] * gu[l][n] * dg[j][m][n]; dgu[j][k][l] = val; } // Γ^j_{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) val += HALF * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]); G[j][k][l] = val; } // ∂_j Γ^k_{lm} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) for (int m = 0; m < 3; m++) { ELEM_TYPE val = ZERO; for (int n = 0; n < 3; n++) { val += dgu[j][k][n] * (dg [l][m][n] + dg [m][l][n] - dg [n][l][m]) + gu [k][n] * (d2g[j][l][m][n] + d2g[j][m][l][n] - d2g[j][n][l][m]); } dG[j][k][l][m] = HALF * val; } // Γ^j = γ^{kl} Γ_{kl}^j for (int j = 0; j < 3; j++) { ELEM_TYPE val = ZERO; for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) val += gu[k][l] * G[j][k][l]; X[j] = val; } // ∂_j β_k for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += g[k][l] * dbeta[j][l] + dg[j][k][l] * beta[l]; dbetal[j][k] = val; } // ∂_{jk} β_l for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) val += d2g[j][k][l][m] * beta[m] + dg[k][l][m] * dbeta[j][m] + dg[j][l][m] * dbeta[k][m] + g[l][m] * d2beta[j][k][m]; d2betal[j][k][l] = val; } // K_{ij} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) K[j][k] = At[j][k] * phi2_inv + THIRD * g[j][k] * trK; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += gu[j][l] * K[l][k]; Km[j][k] = val; } // K^{ij} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += gu[j][l] * Km[k][l]; Ku[j][k] = val; } // ∂_j \dot{γ_kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { dgdot[j][k][l] = -TWO * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) + d2betal[j][k][l] + d2betal[j][l][k]; for (int m = 0; m < 3; m++) dgdot[j][k][l] += -TWO * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]); } // \dot{γ^{jk}} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { gudot[j][k] = TWO * alpha * Ku[j][k]; for (int l = 0; l < 3; l++) { gudot[j][k] += -gu[j][l] * dbeta[l][k] - gu[k][l] * dbeta[l][j]; for (int m = 0; m < 3; m++) gudot[j][k] += -gu[j][l] * G[k][l][m] * beta[m] - gu[k][l] * G[j][l][m] * beta[m]; } } // \dot{Γ}^j_{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) val += gudot[j][m] * (dg [k][l][m] + dg [l][k][m] - dg [m][k][l]) + gu [j][m] * (dgdot[k][l][m] + dgdot[l][k][m] - dgdot[m][k][l]); Gdot[j][k][l] = HALF * val; } Gammadot_term = ZERO; for (int j = 0; j < 3; j++) { ELEM_TYPE val = ZERO; for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { val += gu[k][l] * Gdot[j][k][l]; } Gammadot_term += val * dalpha[j]; } for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += G[l][j][k] * dalpha[l]; D2alpha[j][k] = d2alpha[j][k] - val; } Kij_Dij_alpha = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { Kij_Dij_alpha += Ku[j][k] * D2alpha[j][k]; } k3 = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += Km[k][l] * Km[l][j]; k3 += val * Km[j][k]; } // K_{ij} K^{ij} k2 = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) k2 += Km[j][k] * Km[k][j]; // Ric_{jk} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int m = 0; m < 3; m++) val += dG[m][m][j][k] - dG[k][m][j][m]; for (int m = 0; m < 3; m++) for (int l = 0; l < 3; l++) val += G[l][l][m] * G[m][j][k] - G[l][k][m] * G[m][j][l]; Ric[j][k] = val; } // Ric^j_k for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += gu[j][l] * Ric[l][k]; Ricm[j][k] = val; } for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { ELEM_TYPE val = ZERO; for (int l = 0; l < 3; l++) val += K[j][l] * Km[l][k]; Kdot[j][k] = -D2alpha[j][k] + alpha * (Ric[j][k] + trK * K[j][k] - TWO * val); } k_kdot = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { //k_kdot += kdot[j][k] * Ku[j][k]; k_kdot += Kdot[j][k] * Ku[j][k]; } beta_term = ZERO; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { ELEM_TYPE D2beta = ZERO; D2beta += d2beta[j][k][l]; for (int m = 0; m < 3; m++) { D2beta += dG[j][l][k][m] * beta[m] + G[l][k][m] * dbeta[j][m] + G[l][j][m] * dbeta[k][m] - G[m][j][k] * dbeta[m][l]; for (int n = 0; n < 3; n++) D2beta += G[l][j][m] * G[m][k][n] * beta[n] - G[m][j][k] * G[l][m][n] * beta[n]; } beta_term += -gu[j][k] * D2beta * dalpha[l]; } for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) beta_term += -beta[k] * Ricm[j][k] * dalpha[j]; STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data, idx_dc, gu[0][0] + AXIS_VAL(gu[1][1], ZERO)); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data, idx_dc, gu[2][2]); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data, idx_dc, TWO * gu[0][2]); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data, idx_dc, -X[0] + AXIS_VAL(ZERO, gu[1][1] * xinv)); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data, idx_dc, -X[2]); STORE(solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data, idx_dc, -k2); STORE(solver->rhs, idx_rhs, -TWO * alpha * Kij_Dij_alpha + Gammadot_term + TWO * alpha * (k_kdot + TWO * alpha * k3) + beta_term); #ifdef ELEM_SCALAR if (on_axis) { solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; } #endif } } #undef AXIS_VAL #undef FUNC #undef FUNC2 #undef JOIN2 #undef LOAD #undef STORE #undef ZERO #undef ONE #undef TWO #undef SIX #undef EIGHT #undef TWELVE #undef C16 #undef C30 #undef HALF #undef THIRD #undef C1_144 #undef ELEM_SPLAT #undef ELEM_SIZE #undef ELEM_TYPE #undef TYPE