From 06a361f08cdd07a3bcc8030c9dfe790a84b948b1 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 27 Aug 2022 16:39:18 +0200 Subject: fill_eq_coeffs(): move the per-line kernel into a separate file Prepare for templatizing it for vector versions. --- src/fill_eq_coeffs_template.c | 664 ++++++++++++++++++++++++++++++++++++++++++ src/qms.c | 659 +---------------------------------------- 2 files changed, 667 insertions(+), 656 deletions(-) create mode 100644 src/fill_eq_coeffs_template.c diff --git a/src/fill_eq_coeffs_template.c b/src/fill_eq_coeffs_template.c new file mode 100644 index 0000000..ae8fcd8 --- /dev/null +++ b/src/fill_eq_coeffs_template.c @@ -0,0 +1,664 @@ + +#define LOAD(arr, idx) (arr[idx]) + +static inline double fd1_4(const double *arr, const ptrdiff_t stride, const double h_inv) +{ + const double ap2 = LOAD(arr, 2 * stride); + const double ap1 = LOAD(arr, 1 * stride); + const double am1 = LOAD(arr, -1 * stride); + const double am2 = LOAD(arr, -2 * stride); + return (-ap2 + 8.0 * ap1 - 8.0 * am1 + am2) * h_inv / 12.0; +} + +static inline double fd2_4(const double *arr, const ptrdiff_t stride, const double h_inv) +{ + const double ap2 = LOAD(arr, 2 * stride); + const double ap1 = LOAD(arr, 1 * stride); + const double a0 = LOAD(arr, 0 * stride); + const double am1 = LOAD(arr, -1 * stride); + const double am2 = LOAD(arr, -2 * stride); + return (-ap2 + 16.0 * ap1 - 30.0 * a0 + 16.0 * am1 - am2) * SQR(h_inv) / 12.0; +} + +static inline double fd11_4(const double *arr, ptrdiff_t stride_x, ptrdiff_t stride_z, + double dx_inv, double dz_inv) +{ + const double ap2p2 = LOAD(arr, 2 * stride_x + 2 * stride_z); + const double ap2p1 = LOAD(arr, 2 * stride_x + 1 * stride_z); + const double ap2m1 = LOAD(arr, 2 * stride_x - 1 * stride_z); + const double ap2m2 = LOAD(arr, 2 * stride_x - 2 * stride_z); + + const double ap1p2 = LOAD(arr, 1 * stride_x + 2 * stride_z); + const double ap1p1 = LOAD(arr, 1 * stride_x + 1 * stride_z); + const double ap1m1 = LOAD(arr, 1 * stride_x - 1 * stride_z); + const double ap1m2 = LOAD(arr, 1 * stride_x - 2 * stride_z); + + const double am1p2 = LOAD(arr, -1 * stride_x + 2 * stride_z); + const double am1p1 = LOAD(arr, -1 * stride_x + 1 * stride_z); + const double am1m1 = LOAD(arr, -1 * stride_x - 1 * stride_z); + const double am1m2 = LOAD(arr, -1 * stride_x - 2 * stride_z); + + const double am2p2 = LOAD(arr, -2 * stride_x + 2 * stride_z); + const double am2p1 = LOAD(arr, -2 * stride_x + 1 * stride_z); + const double am2m1 = LOAD(arr, -2 * stride_x - 1 * stride_z); + const double am2m2 = LOAD(arr, -2 * stride_x - 2 * stride_z); + + return (-1.0 * (-1.0 * ap2p2 + 8.0 * ap1p2 - 8.0 * am1p2 + 1.0 * am2p2) + +8.0 * (-1.0 * ap2p1 + 8.0 * ap1p1 - 8.0 * am1p1 + 1.0 * am2p1) + -8.0 * (-1.0 * ap2m1 + 8.0 * ap1m1 - 8.0 * am1m1 + 1.0 * am2m1) + +1.0 * (-1.0 * ap2m2 + 8.0 * ap1m2 - 8.0 * am1m2 + 1.0 * am2m2)) * dx_inv * dz_inv / 144.0; +} + +#undef LOAD + +#if 0 +#define FD8(arr, stride, h_inv) \ + ((-3.0 * arr[4 * stride] + 32.0 * arr[3 * stride] - 168.0 * arr[2 * stride] + 672.0 * arr[1 * stride] \ + - 672.0 * arr[-1 * stride] + 168.0 * arr[-2 * stride] - 32.0 * arr[-3 * stride] + 3.0 * arr[-4 * stride]) * h_inv / 840.0) +#define FD28(arr, stride, h_inv) \ + ((-9.0 * arr[ 4 * stride] + 128.0 * arr[ 3 * stride] - 1008.0 * arr[ 2 * stride] + 8064.0 * arr[ 1 * stride] \ + -9.0 * arr[-4 * stride] + 128.0 * arr[-3 * stride] - 1008.0 * arr[-2 * stride] + 8064.0 * arr[-1 * stride] \ + - 14350.0 * arr[0]) * SQR(h_inv) / 5040.0) + +static double diff8_dx(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD8(arr, 1, h_inv); +} +static double diff8_dz(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD8(arr, stride, h_inv); +} + +static double diff8_dxx(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD28(arr, 1, h_inv); +} +static double diff8_dzz(const double *arr, ptrdiff_t stride, double h_inv) +{ + return FD28(arr, stride, h_inv); +} +static double diff8_dxz(const double *arr, ptrdiff_t stride, double dx_inv, double dz_inv) +{ + static const double fd_coeffs[] = { 3.0, -32.0, 168.0, -672.0, 0.0, 672.0, -168.0, 32.0, -3.0 }; + static const int stencil = ARRAY_ELEMS(fd_coeffs) / 2; + double val = 0.0; + for (int j = -stencil; j <= stencil; j++) { + double tmp = 0.0; + for (int i = -stencil; i <= stencil; i++) + tmp += fd_coeffs[i + stencil] * arr[j * stride + i]; + val += fd_coeffs[j + stencil] * tmp; + } + return val * dx_inv * dz_inv / SQR(840.0); +} + +#define diff1 fd1_8 +#define diff2 fd2_8 +#define diff11 fd11_8 +#else +#define diff1 fd1_4 +#define diff2 fd2_4 +#define diff11 fd11_4 +#endif + +#define diff_dx(arr, stride, dx_inv, dz_inv) (diff1 (arr, 1, dx_inv)) +#define diff_dz(arr, stride, dx_inv, dz_inv) (diff1 (arr, stride, dz_inv)) +#define diff_dxx(arr, stride, dx_inv, dz_inv) (diff2 (arr, 1, dx_inv)) +#define diff_dzz(arr, stride, dx_inv, dz_inv) (diff2 (arr, stride, dz_inv)) +#define diff_dxz(arr, stride, dx_inv, dz_inv) (diff11(arr, 1, stride, dx_inv, dz_inv)) + +static void fill_eq_coeffs_line(const cGH *gh, const CoordPatch *cp, MG2DContext *solver, + const double * const vars[], const int idx_z, + const ptrdiff_t stride_z, const double dx_inv, const double dz_inv) +{ + for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { + const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); + const int idx_dc = idx_z * solver->diff_coeffs[0]->stride + idx_x; + const int idx_rhs = idx_z * solver->rhs_stride + idx_x; + + const double x = vars[RHS_VAR_X][idx_src]; + const int on_axis = fabs(x) < 1e-13; + + double K[3][3], Km[3][3], Ku[3][3], dK[3][3][3]; + double gtu[3][3], g[3][3], gu[3][3]; + double dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3]; + double G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3]; + double Ric[3][3], Ricm[3][3]; + double dgdot[3][3][3]; + double D2alpha[3][3]; + double Kdot[3][3]; + double k2, Kij_Dij_alpha, k_kdot, k3; + double Gammadot_term, beta_term; + + double betal[3], dbetal[3][3], d2betal[3][3][3]; + +#define LOAD_VAR(var) ( vars[RHS_VAR_ ## var][idx_src]) +#define DX(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv )) +#define DZ(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) +#define DXX(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv)) +#define DZZ(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) +#define DXZ(var) (diff11(&vars[RHS_VAR_ ## var][idx_src], 1, stride_z, dx_inv, dz_inv)) + + const double gtxx = LOAD_VAR(GTXX); + const double gtyy = LOAD_VAR(GTYY); + const double gtzz = LOAD_VAR(GTZZ); + const double gtxy = LOAD_VAR(GTXY); + const double gtxz = LOAD_VAR(GTXZ); + const double gtyz = LOAD_VAR(GTYZ); + + const double gt[3][3] = {{ gtxx, gtxy, gtxz }, + { gtxy, gtyy, gtyz }, + { gtxz, gtyz, gtzz }}; + + const double dx_gt11 = DX(GTXX); + const double dx_gt22 = DX(GTYY); + const double dx_gt33 = DX(GTZZ); + const double dx_gt13 = DX(GTXZ); + + const double dz_gt11 = DZ(GTXX); + const double dz_gt22 = DZ(GTYY); + const double dz_gt33 = DZ(GTZZ); + const double dz_gt13 = DZ(GTXZ); + + const double dgt[3][3][3] = { + { + { dx_gt11, 0.0, dx_gt13 }, + { 0.0, dx_gt22, 0.0 }, + { dx_gt13, 0.0, dx_gt33 }, + }, + { + { 0.0, on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0 }, + { on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0, on_axis ? dx_gt13 : gtxz / x }, + { 0.0, on_axis ? dx_gt13 : gtxz / x, 0.0 }, + }, + { + { dz_gt11, 0.0, dz_gt13 }, + { 0.0, dz_gt22, 0.0 }, + { dz_gt13, 0.0, dz_gt33 }, + }, + }; + + const double dxx_gt11 = DXX(GTXX); + const double dxx_gt22 = DXX(GTYY); + const double dxx_gt33 = DXX(GTZZ); + const double dxx_gt13 = DXX(GTXZ); + + const double dxz_gt11 = DXZ(GTXX); + const double dxz_gt22 = DXZ(GTYY); + const double dxz_gt33 = DXZ(GTZZ); + const double dxz_gt13 = DXZ(GTXZ); + + const double dzz_gt11 = DZZ(GTXX); + const double dzz_gt22 = DZZ(GTYY); + const double dzz_gt33 = DZZ(GTZZ); + const double dzz_gt13 = DZZ(GTXZ); + + const double d2gt[3][3][3][3] = { + { + { + { dxx_gt11, 0.0, dxx_gt13 }, + { 0.0, dxx_gt22, 0.0 }, + { dxx_gt13, 0.0, dxx_gt33 }, + }, + { + { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, + { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, + on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, + { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, + }, + { + { dxz_gt11, 0.0, dxz_gt13 }, + { 0.0, dxz_gt22, 0.0 }, + { dxz_gt13, 0.0, dxz_gt33 }, + }, + + }, + { + { + { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, + { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, + on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, + { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, + }, + { + { on_axis ? dxx_gt22 : dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x), 0.0, + on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, + { 0.0, on_axis ? dxx_gt11 : dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x), 0.0 }, + { on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0, on_axis ? dxx_gt33 : dx_gt33 / x }, + }, + { + { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, + { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, + on_axis ? dxz_gt13 : dz_gt13 / x }, + { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, + }, + + }, + { + { + { dxz_gt11, 0.0, dxz_gt13 }, + { 0.0, dxz_gt22, 0.0 }, + { dxz_gt13, 0.0, dxz_gt33 }, + }, + { + { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, + { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, + on_axis ? dxz_gt13 : dz_gt13 / x }, + { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, + }, + { + { dzz_gt11, 0.0, dzz_gt13 }, + { 0.0, dzz_gt22, 0.0 }, + { dzz_gt13, 0.0, dzz_gt33 }, + }, + + }, + }; + + const double Atxx = LOAD_VAR(ATXX); + const double Atyy = LOAD_VAR(ATYY); + const double Atzz = LOAD_VAR(ATZZ); + const double Atxy = LOAD_VAR(ATXY); + const double Atxz = LOAD_VAR(ATXZ); + const double Atyz = LOAD_VAR(ATYZ); + + const double At[3][3] = { + { Atxx, Atxy, Atxz }, + { Atxy, Atyy, Atyz }, + { Atxz, Atyz, Atzz }}; + + const double dx_At11 = DX(ATXX); + const double dx_At22 = DX(ATYY); + const double dx_At33 = DX(ATZZ); + const double dx_At13 = DX(ATXZ); + + const double dz_At11 = DZ(ATXX); + const double dz_At22 = DZ(ATYY); + const double dz_At33 = DZ(ATZZ); + const double dz_At13 = DZ(ATXZ); + + const double dAt[3][3][3] = { + { + { dx_At11, 0.0, dx_At13 }, + { 0.0, dx_At22, 0.0 }, + { dx_At13, 0.0, dx_At33 }, + }, + { + { 0.0, on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0 }, + { on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0, on_axis ? dx_At13 : Atxz / x }, + { 0.0, on_axis ? dx_At13 : Atxz / x, 0.0 }, + }, + { + { dz_At11, 0.0, dz_At13 }, + { 0.0, dz_At22, 0.0 }, + { dz_At13, 0.0, dz_At33 }, + }, + }; + + const double phi = LOAD_VAR(PHI); + const double dphi[3] = { DX(PHI), 0.0, DZ(PHI) }; + + const double dxx_phi = DXX(PHI); + const double dzz_phi = DZZ(PHI); + const double dxz_phi = DXZ(PHI); + + const double d2phi[3][3] = { + { dxx_phi, 0.0, dxz_phi }, + { 0.0, on_axis ? dxx_phi : dphi[0] / x, 0.0 }, + { dxz_phi, 0.0, dzz_phi }, + }; + + const double trK = LOAD_VAR(TRK); + const double dtrK[3] = { DX(TRK), 0.0, DZ(TRK) }; + + const double Xtx = LOAD_VAR(XTX); + const double Xtz = LOAD_VAR(XTZ); + + const double alpha = LOAD_VAR(ALPHA); + const double dx_alpha = DX(ALPHA); + const double dz_alpha = DZ(ALPHA); + + const double dalpha[3] = { dx_alpha, 0.0, dz_alpha }; + + const double dxx_alpha = DXX(ALPHA); + const double dzz_alpha = DZZ(ALPHA); + const double dxz_alpha = DXZ(ALPHA); + + const double d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha }, + { 0, on_axis ? dxx_alpha : dx_alpha / x, 0 }, + { dxz_alpha, 0, dzz_alpha }}; + + const double betax = LOAD_VAR(BETAX); + const double betaz = LOAD_VAR(BETAX); + + const double beta[3] = { betax, 0.0, betaz }; + + const double dx_betax = DX(BETAX); + const double dz_betax = DZ(BETAX); + + const double dx_betaz = DX(BETAZ); + const double dz_betaz = DZ(BETAZ); + + const double dbeta[3][3] = {{ dx_betax, 0.0, dx_betaz }, + { 0.0, on_axis ? dx_betax : betax / x, 0.0 }, + { dz_betax, 0.0, dz_betaz }}; + + const double dxx_betax = DXX(BETAX); + const double dxz_betax = DXZ(BETAX); + const double dzz_betax = DZZ(BETAX); + + const double dxx_betaz = DXX(BETAZ); + const double dxz_betaz = DXZ(BETAZ); + const double dzz_betaz = DZZ(BETAZ); + +#undef LOAD_VAR +#undef DX +#undef DZ +#undef DXX +#undef DZZ +#undef DXZ + + const double d2beta[3][3][3] = { + { + { dxx_betax, 0.0, dxx_betaz }, + { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, + { dxz_betax, 0.0, dxz_betaz }, + }, + { + { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, + { on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0, on_axis ? dxx_betaz : dx_betaz / x }, + { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, + }, + { + { dxz_betax, 0.0, dxz_betaz }, + { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, + { dzz_betax, 0.0, dzz_betaz }, + }, + }; + + const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); + + // \tilde{γ}^{ij} + gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det; + gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det; + gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det; + gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det; + gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det; + gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det; + gtu[1][0] = gtu[0][1]; + gtu[2][0] = gtu[0][2]; + gtu[2][1] = gtu[1][2]; + + // γ_{jk}/^{jk} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + gu[j][k] = SQR(phi) * gtu[j][k]; + g[j][k] = gt[j][k] / SQR(phi); + } + + // β_j + for (int j = 0; j < 3; j++) { + double val = 0.0; + for (int k = 0; k < 3; k++) + val += g[j][k] * beta[k]; + betal[j] = val; + } + + // ∂_j γ_{kl} + // ∂_j K_{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + dg[j][k][l] = -2.0 * dphi[j] * gt[k][l] / (phi * SQR(phi)) + dgt[j][k][l] / SQR(phi); + dK[j][k][l] = -2.0 * dphi[j] * At[k][l] / (phi * SQR(phi)) + dAt[j][k][l] / SQR(phi) + + (1.0 / 3.0) * (dtrK[j] * g[k][l] + trK * dg[j][k][l]); + } + + // ∂_{jk} g_{lm} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + for (int m = 0; m < 3; m++) { + d2g[j][k][l][m] = 6.0 * gt [l][m] * dphi[j] * dphi[k] / SQR(SQR(phi)) - + 2.0 * gt [l][m] * d2phi[j][k] / (phi * SQR(phi)) - + 2.0 * dgt [j][l][m] * dphi[k] / (phi * SQR(phi)) - + 2.0 * dgt [k][l][m] * dphi[j] / (phi * SQR(phi)) + + d2gt[j][k][l][m] / SQR(phi); + } + + // ∂_j γ^{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + for (int n = 0; n < 3; n++) + val += -gu[k][m] * gu[l][n] * dg[j][m][n]; + dgu[j][k][l] = val; + } + + // Γ^j_{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += 0.5 * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]); + G[j][k][l] = val; + } + + // ∂_j Γ^k_{lm} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + for (int m = 0; m < 3; m++) { + double val = 0.0; + for (int n = 0; n < 3; n++) { + val += dgu[j][k][n] * (dg [l][m][n] + dg [m][l][n] - dg [n][l][m]) + + gu [k][n] * (d2g[j][l][m][n] + d2g[j][m][l][n] - d2g[j][n][l][m]); + } + dG[j][k][l][m] = 0.5 * val; + } + + // Γ^j = γ^{kl} Γ_{kl}^j + for (int j = 0; j < 3; j++) { + double val = 0.0; + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + val += gu[k][l] * G[j][k][l]; + X[j] = val; + } + + // ∂_j β_k + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += g[k][l] * dbeta[j][l] + dg[j][k][l] * beta[l]; + dbetal[j][k] = val; + } + + // ∂_{jk} β_l + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += d2g[j][k][l][m] * beta[m] + dg[k][l][m] * dbeta[j][m] + dg[j][l][m] * dbeta[k][m] + + g[l][m] * d2beta[j][k][m]; + d2betal[j][k][l] = val; + } + + // K_{ij} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + K[j][k] = At[j][k] / SQR(phi) + (1.0 / 3.0) * g[j][k] * trK; + + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += gu[j][l] * K[l][k]; + Km[j][k] = val; + } + + // K^{ij} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += gu[j][l] * Km[k][l]; + Ku[j][k] = val; + } + + // ∂_j \dot{γ_kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + dgdot[j][k][l] = -2.0 * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) + + d2betal[j][k][l] + d2betal[j][l][k]; + for (int m = 0; m < 3; m++) + dgdot[j][k][l] += -2.0 * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]); + } + + // \dot{γ^{jk}} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + gudot[j][k] = 2.0 * alpha * Ku[j][k]; + + for (int l = 0; l < 3; l++) { + gudot[j][k] += -gu[j][l] * dbeta[l][k] - gu[k][l] * dbeta[l][j]; + + for (int m = 0; m < 3; m++) + gudot[j][k] += -gu[j][l] * G[k][l][m] * beta[m] - gu[k][l] * G[j][l][m] * beta[m]; + } + } + + + // \dot{Γ}^j_{kl} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += gudot[j][m] * (dg [k][l][m] + dg [l][k][m] - dg [m][k][l]) + + gu [j][m] * (dgdot[k][l][m] + dgdot[l][k][m] - dgdot[m][k][l]); + + Gdot[j][k][l] = 0.5 * val; + } + + Gammadot_term = 0.0; + for (int j = 0; j < 3; j++) { + double val = 0.0; + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + val += gu[k][l] * Gdot[j][k][l]; + } + + Gammadot_term += val * dalpha[j]; + } + + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += G[l][j][k] * dalpha[l]; + + D2alpha[j][k] = d2alpha[j][k] - val; + } + + Kij_Dij_alpha = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + Kij_Dij_alpha += Ku[j][k] * D2alpha[j][k]; + } + + k3 = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += Km[k][l] * Km[l][j]; + k3 += val * Km[j][k]; + } + + // K_{ij} K^{ij} + k2 = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + k2 += Km[j][k] * Km[k][j]; + + // Ric_{jk} + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int m = 0; m < 3; m++) + val += dG[m][m][j][k] - dG[k][m][j][m]; + for (int m = 0; m < 3; m++) + for (int l = 0; l < 3; l++) + val += G[l][l][m] * G[m][j][k] - G[l][k][m] * G[m][j][l]; + Ric[j][k] = val; + } + + // Ric^j_k + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += gu[j][l] * Ric[l][k]; + Ricm[j][k] = val; + } + + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + double val = 0.0; + for (int l = 0; l < 3; l++) + val += K[j][l] * Km[l][k]; + + Kdot[j][k] = -D2alpha[j][k] + alpha * (Ric[j][k] + trK * K[j][k] - 2 * val); + } + + k_kdot = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) { + //k_kdot += kdot[j][k] * Ku[j][k]; + k_kdot += Kdot[j][k] * Ku[j][k]; + } + + + beta_term = 0.0; + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) { + double D2beta = 0.0; + + D2beta += d2beta[j][k][l]; + + for (int m = 0; m < 3; m++) { + D2beta += dG[j][l][k][m] * beta[m] + G[l][k][m] * dbeta[j][m] + G[l][j][m] * dbeta[k][m] - G[m][j][k] * dbeta[m][l]; + + for (int n = 0; n < 3; n++) + D2beta += G[l][j][m] * G[m][k][n] * beta[n] - G[m][j][k] * G[l][m][n] * beta[n]; + } + + beta_term += -gu[j][k] * D2beta * dalpha[l]; + } + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + beta_term += -beta[k] * Ricm[j][k] * dalpha[j]; + + solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_dc] = gu[0][0] + (on_axis ? gu[1][1] : 0.0); + solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_dc] = gu[2][2]; + solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_dc] = 2.0 * gu[0][2]; + solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_dc] = -X[0] + (on_axis ? 0.0 : gu[1][1] / x); + solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_dc] = -X[2]; + solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_dc] = -k2; + + solver->rhs[idx_rhs] = -2.0 * alpha * Kij_Dij_alpha + Gammadot_term + + 2.0 * alpha * (k_kdot + 2.0 * alpha * k3) + beta_term; + + if (on_axis) { + solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; + solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; + } + } +} diff --git a/src/qms.c b/src/qms.c index 30e1c73..8fae7f0 100644 --- a/src/qms.c +++ b/src/qms.c @@ -494,111 +494,6 @@ static void print_stats(QMSMGContext *ms) ms->log_level = orig_log_level; } -#define LOAD(arr, idx) (arr[idx]) - -static inline double fd1_4(const double *arr, const ptrdiff_t stride, const double h_inv) -{ - const double ap2 = LOAD(arr, 2 * stride); - const double ap1 = LOAD(arr, 1 * stride); - const double am1 = LOAD(arr, -1 * stride); - const double am2 = LOAD(arr, -2 * stride); - return (-ap2 + 8.0 * ap1 - 8.0 * am1 + am2) * h_inv / 12.0; -} - -static inline double fd2_4(const double *arr, const ptrdiff_t stride, const double h_inv) -{ - const double ap2 = LOAD(arr, 2 * stride); - const double ap1 = LOAD(arr, 1 * stride); - const double a0 = LOAD(arr, 0 * stride); - const double am1 = LOAD(arr, -1 * stride); - const double am2 = LOAD(arr, -2 * stride); - return (-ap2 + 16.0 * ap1 - 30.0 * a0 + 16.0 * am1 - am2) * SQR(h_inv) / 12.0; -} - -static inline double fd11_4(const double *arr, ptrdiff_t stride_x, ptrdiff_t stride_z, - double dx_inv, double dz_inv) -{ - const double ap2p2 = LOAD(arr, 2 * stride_x + 2 * stride_z); - const double ap2p1 = LOAD(arr, 2 * stride_x + 1 * stride_z); - const double ap2m1 = LOAD(arr, 2 * stride_x - 1 * stride_z); - const double ap2m2 = LOAD(arr, 2 * stride_x - 2 * stride_z); - - const double ap1p2 = LOAD(arr, 1 * stride_x + 2 * stride_z); - const double ap1p1 = LOAD(arr, 1 * stride_x + 1 * stride_z); - const double ap1m1 = LOAD(arr, 1 * stride_x - 1 * stride_z); - const double ap1m2 = LOAD(arr, 1 * stride_x - 2 * stride_z); - - const double am1p2 = LOAD(arr, -1 * stride_x + 2 * stride_z); - const double am1p1 = LOAD(arr, -1 * stride_x + 1 * stride_z); - const double am1m1 = LOAD(arr, -1 * stride_x - 1 * stride_z); - const double am1m2 = LOAD(arr, -1 * stride_x - 2 * stride_z); - - const double am2p2 = LOAD(arr, -2 * stride_x + 2 * stride_z); - const double am2p1 = LOAD(arr, -2 * stride_x + 1 * stride_z); - const double am2m1 = LOAD(arr, -2 * stride_x - 1 * stride_z); - const double am2m2 = LOAD(arr, -2 * stride_x - 2 * stride_z); - - return (-1.0 * (-1.0 * ap2p2 + 8.0 * ap1p2 - 8.0 * am1p2 + 1.0 * am2p2) - +8.0 * (-1.0 * ap2p1 + 8.0 * ap1p1 - 8.0 * am1p1 + 1.0 * am2p1) - -8.0 * (-1.0 * ap2m1 + 8.0 * ap1m1 - 8.0 * am1m1 + 1.0 * am2m1) - +1.0 * (-1.0 * ap2m2 + 8.0 * ap1m2 - 8.0 * am1m2 + 1.0 * am2m2)) * dx_inv * dz_inv / 144.0; -} - -#define FD8(arr, stride, h_inv) \ - ((-3.0 * arr[4 * stride] + 32.0 * arr[3 * stride] - 168.0 * arr[2 * stride] + 672.0 * arr[1 * stride] \ - - 672.0 * arr[-1 * stride] + 168.0 * arr[-2 * stride] - 32.0 * arr[-3 * stride] + 3.0 * arr[-4 * stride]) * h_inv / 840.0) -#define FD28(arr, stride, h_inv) \ - ((-9.0 * arr[ 4 * stride] + 128.0 * arr[ 3 * stride] - 1008.0 * arr[ 2 * stride] + 8064.0 * arr[ 1 * stride] \ - -9.0 * arr[-4 * stride] + 128.0 * arr[-3 * stride] - 1008.0 * arr[-2 * stride] + 8064.0 * arr[-1 * stride] \ - - 14350.0 * arr[0]) * SQR(h_inv) / 5040.0) - -static double diff8_dx(const double *arr, ptrdiff_t stride, double h_inv) -{ - return FD8(arr, 1, h_inv); -} -static double diff8_dz(const double *arr, ptrdiff_t stride, double h_inv) -{ - return FD8(arr, stride, h_inv); -} - -static double diff8_dxx(const double *arr, ptrdiff_t stride, double h_inv) -{ - return FD28(arr, 1, h_inv); -} -static double diff8_dzz(const double *arr, ptrdiff_t stride, double h_inv) -{ - return FD28(arr, stride, h_inv); -} -static double diff8_dxz(const double *arr, ptrdiff_t stride, double dx_inv, double dz_inv) -{ - static const double fd_coeffs[] = { 3.0, -32.0, 168.0, -672.0, 0.0, 672.0, -168.0, 32.0, -3.0 }; - static const int stencil = ARRAY_ELEMS(fd_coeffs) / 2; - double val = 0.0; - for (int j = -stencil; j <= stencil; j++) { - double tmp = 0.0; - for (int i = -stencil; i <= stencil; i++) - tmp += fd_coeffs[i + stencil] * arr[j * stride + i]; - val += fd_coeffs[j + stencil] * tmp; - } - return val * dx_inv * dz_inv / SQR(840.0); -} - -#if 0 -#define diff1 fd1_8 -#define diff2 fd2_8 -#define diff11 fd11_8 -#else -#define diff1 fd1_4 -#define diff2 fd2_4 -#define diff11 fd11_4 -#endif - -#define diff_dx(arr, stride, dx_inv, dz_inv) (diff1 (arr, 1, dx_inv)) -#define diff_dz(arr, stride, dx_inv, dz_inv) (diff1 (arr, stride, dz_inv)) -#define diff_dxx(arr, stride, dx_inv, dz_inv) (diff2 (arr, 1, dx_inv)) -#define diff_dzz(arr, stride, dx_inv, dz_inv) (diff2 (arr, stride, dz_inv)) -#define diff_dxz(arr, stride, dx_inv, dz_inv) (diff11(arr, 1, stride, dx_inv, dz_inv)) - enum RHSVar { RHS_VAR_GTXX, RHS_VAR_GTYY, @@ -633,6 +528,8 @@ enum RHSVar { RHS_VAR_NB, }; +#include "fill_eq_coeffs_template.c" + static void fill_eq_coeffs(QMSMGContext *ctx, CoordPatch *cp, MG2DContext *solver) { cGH *gh = ctx->gh; @@ -676,557 +573,7 @@ static void fill_eq_coeffs(QMSMGContext *ctx, CoordPatch *cp, MG2DContext *solve #pragma omp parallel for for (int idx_z = 0; idx_z < solver->local_size[1]; idx_z++) - for (int idx_x = 0; idx_x < solver->local_size[0]; idx_x++) { - const int idx_src = CCTK_GFINDEX3D(gh, idx_x + cp->offset_left[0], cp->y_idx, idx_z + cp->offset_left[1]); - const int idx_dc = idx_z * solver->diff_coeffs[0]->stride + idx_x; - const int idx_rhs = idx_z * solver->rhs_stride + idx_x; - - const double x = vars[RHS_VAR_X][idx_src]; - const int on_axis = fabs(x) < 1e-13; - - double K[3][3], Km[3][3], Ku[3][3], dK[3][3][3]; - double gtu[3][3], g[3][3], gu[3][3]; - double dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3]; - double G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3]; - double Ric[3][3], Ricm[3][3]; - double dgdot[3][3][3]; - double D2alpha[3][3]; - double Kdot[3][3]; - double k2, Kij_Dij_alpha, k_kdot, k3; - double Gammadot_term, beta_term; - - double betal[3], dbetal[3][3], d2betal[3][3][3]; - -#define LOAD_VAR(var) ( vars[RHS_VAR_ ## var][idx_src]) -#define DX(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv )) -#define DZ(var) (diff1 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) -#define DXX(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], 1, dx_inv)) -#define DZZ(var) (diff2 (&vars[RHS_VAR_ ## var][idx_src], stride_z, dz_inv)) -#define DXZ(var) (diff11(&vars[RHS_VAR_ ## var][idx_src], 1, stride_z, dx_inv, dz_inv)) - - const double gtxx = LOAD_VAR(GTXX); - const double gtyy = LOAD_VAR(GTYY); - const double gtzz = LOAD_VAR(GTZZ); - const double gtxy = LOAD_VAR(GTXY); - const double gtxz = LOAD_VAR(GTXZ); - const double gtyz = LOAD_VAR(GTYZ); - - const double gt[3][3] = {{ gtxx, gtxy, gtxz }, - { gtxy, gtyy, gtyz }, - { gtxz, gtyz, gtzz }}; - - const double dx_gt11 = DX(GTXX); - const double dx_gt22 = DX(GTYY); - const double dx_gt33 = DX(GTZZ); - const double dx_gt13 = DX(GTXZ); - - const double dz_gt11 = DZ(GTXX); - const double dz_gt22 = DZ(GTYY); - const double dz_gt33 = DZ(GTZZ); - const double dz_gt13 = DZ(GTXZ); - - const double dgt[3][3][3] = { - { - { dx_gt11, 0.0, dx_gt13 }, - { 0.0, dx_gt22, 0.0 }, - { dx_gt13, 0.0, dx_gt33 }, - }, - { - { 0.0, on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0 }, - { on_axis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0, on_axis ? dx_gt13 : gtxz / x }, - { 0.0, on_axis ? dx_gt13 : gtxz / x, 0.0 }, - }, - { - { dz_gt11, 0.0, dz_gt13 }, - { 0.0, dz_gt22, 0.0 }, - { dz_gt13, 0.0, dz_gt33 }, - }, - }; - - const double dxx_gt11 = DXX(GTXX); - const double dxx_gt22 = DXX(GTYY); - const double dxx_gt33 = DXX(GTZZ); - const double dxx_gt13 = DXX(GTXZ); - - const double dxz_gt11 = DXZ(GTXX); - const double dxz_gt22 = DXZ(GTYY); - const double dxz_gt33 = DXZ(GTZZ); - const double dxz_gt13 = DXZ(GTXZ); - - const double dzz_gt11 = DZZ(GTXX); - const double dzz_gt22 = DZZ(GTYY); - const double dzz_gt33 = DZZ(GTZZ); - const double dzz_gt13 = DZZ(GTXZ); - - const double d2gt[3][3][3][3] = { - { - { - { dxx_gt11, 0.0, dxx_gt13 }, - { 0.0, dxx_gt22, 0.0 }, - { dxx_gt13, 0.0, dxx_gt33 }, - }, - { - { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, - { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, - on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, - { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, - }, - { - { dxz_gt11, 0.0, dxz_gt13 }, - { 0.0, dxz_gt22, 0.0 }, - { dxz_gt13, 0.0, dxz_gt33 }, - }, - - }, - { - { - { 0.0, on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, - { on_axis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, - on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, - { 0.0, on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, - }, - { - { on_axis ? dxx_gt22 : dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x), 0.0, - on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, - { 0.0, on_axis ? dxx_gt11 : dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x), 0.0 }, - { on_axis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0, on_axis ? dxx_gt33 : dx_gt33 / x }, - }, - { - { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, - { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, - on_axis ? dxz_gt13 : dz_gt13 / x }, - { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, - }, - - }, - { - { - { dxz_gt11, 0.0, dxz_gt13 }, - { 0.0, dxz_gt22, 0.0 }, - { dxz_gt13, 0.0, dxz_gt33 }, - }, - { - { 0.0, on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, - { on_axis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, - on_axis ? dxz_gt13 : dz_gt13 / x }, - { 0.0, on_axis ? dxz_gt13 : dz_gt13 / x, 0.0 }, - }, - { - { dzz_gt11, 0.0, dzz_gt13 }, - { 0.0, dzz_gt22, 0.0 }, - { dzz_gt13, 0.0, dzz_gt33 }, - }, - - }, - }; - - const double Atxx = LOAD_VAR(ATXX); - const double Atyy = LOAD_VAR(ATYY); - const double Atzz = LOAD_VAR(ATZZ); - const double Atxy = LOAD_VAR(ATXY); - const double Atxz = LOAD_VAR(ATXZ); - const double Atyz = LOAD_VAR(ATYZ); - - const double At[3][3] = { - { Atxx, Atxy, Atxz }, - { Atxy, Atyy, Atyz }, - { Atxz, Atyz, Atzz }}; - - const double dx_At11 = DX(ATXX); - const double dx_At22 = DX(ATYY); - const double dx_At33 = DX(ATZZ); - const double dx_At13 = DX(ATXZ); - - const double dz_At11 = DZ(ATXX); - const double dz_At22 = DZ(ATYY); - const double dz_At33 = DZ(ATZZ); - const double dz_At13 = DZ(ATXZ); - - const double dAt[3][3][3] = { - { - { dx_At11, 0.0, dx_At13 }, - { 0.0, dx_At22, 0.0 }, - { dx_At13, 0.0, dx_At33 }, - }, - { - { 0.0, on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0 }, - { on_axis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0, on_axis ? dx_At13 : Atxz / x }, - { 0.0, on_axis ? dx_At13 : Atxz / x, 0.0 }, - }, - { - { dz_At11, 0.0, dz_At13 }, - { 0.0, dz_At22, 0.0 }, - { dz_At13, 0.0, dz_At33 }, - }, - }; - - const double phi = LOAD_VAR(PHI); - const double dphi[3] = { DX(PHI), 0.0, DZ(PHI) }; - - const double dxx_phi = DXX(PHI); - const double dzz_phi = DZZ(PHI); - const double dxz_phi = DXZ(PHI); - - const double d2phi[3][3] = { - { dxx_phi, 0.0, dxz_phi }, - { 0.0, on_axis ? dxx_phi : dphi[0] / x, 0.0 }, - { dxz_phi, 0.0, dzz_phi }, - }; - - const double trK = LOAD_VAR(TRK); - const double dtrK[3] = { DX(TRK), 0.0, DZ(TRK) }; - - const double Xtx = LOAD_VAR(XTX); - const double Xtz = LOAD_VAR(XTZ); - - const double alpha = LOAD_VAR(ALPHA); - const double dx_alpha = DX(ALPHA); - const double dz_alpha = DZ(ALPHA); - - const double dalpha[3] = { dx_alpha, 0.0, dz_alpha }; - - const double dxx_alpha = DXX(ALPHA); - const double dzz_alpha = DZZ(ALPHA); - const double dxz_alpha = DXZ(ALPHA); - - const double d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha }, - { 0, on_axis ? dxx_alpha : dx_alpha / x, 0 }, - { dxz_alpha, 0, dzz_alpha }}; - - const double betax = LOAD_VAR(BETAX); - const double betaz = LOAD_VAR(BETAX); - - const double beta[3] = { betax, 0.0, betaz }; - - const double dx_betax = DX(BETAX); - const double dz_betax = DZ(BETAX); - - const double dx_betaz = DX(BETAZ); - const double dz_betaz = DZ(BETAZ); - - const double dbeta[3][3] = {{ dx_betax, 0.0, dx_betaz }, - { 0.0, on_axis ? dx_betax : betax / x, 0.0 }, - { dz_betax, 0.0, dz_betaz }}; - - const double dxx_betax = DXX(BETAX); - const double dxz_betax = DXZ(BETAX); - const double dzz_betax = DZZ(BETAX); - - const double dxx_betaz = DXX(BETAZ); - const double dxz_betaz = DXZ(BETAZ); - const double dzz_betaz = DZZ(BETAZ); - -#undef LOAD_VAR -#undef DX -#undef DZ -#undef DXX -#undef DZZ -#undef DXZ - - const double d2beta[3][3][3] = { - { - { dxx_betax, 0.0, dxx_betaz }, - { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, - { dxz_betax, 0.0, dxz_betaz }, - }, - { - { 0.0, on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, - { on_axis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0, on_axis ? dxx_betaz : dx_betaz / x }, - { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, - }, - { - { dxz_betax, 0.0, dxz_betaz }, - { 0.0, on_axis ? dxz_betax : dz_betax / x, 0.0 }, - { dzz_betax, 0.0, dzz_betaz }, - }, - }; - - const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); - - // \tilde{γ}^{ij} - gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det; - gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det; - gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det; - gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det; - gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det; - gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det; - gtu[1][0] = gtu[0][1]; - gtu[2][0] = gtu[0][2]; - gtu[2][1] = gtu[1][2]; - - // γ_{jk}/^{jk} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - gu[j][k] = SQR(phi) * gtu[j][k]; - g[j][k] = gt[j][k] / SQR(phi); - } - - // β_j - for (int j = 0; j < 3; j++) { - double val = 0.0; - for (int k = 0; k < 3; k++) - val += g[j][k] * beta[k]; - betal[j] = val; - } - - // ∂_j γ_{kl} - // ∂_j K_{kl} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - dg[j][k][l] = -2.0 * dphi[j] * gt[k][l] / (phi * SQR(phi)) + dgt[j][k][l] / SQR(phi); - dK[j][k][l] = -2.0 * dphi[j] * At[k][l] / (phi * SQR(phi)) + dAt[j][k][l] / SQR(phi) + - (1.0 / 3.0) * (dtrK[j] * g[k][l] + trK * dg[j][k][l]); - } - - // ∂_{jk} g_{lm} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) - for (int m = 0; m < 3; m++) { - d2g[j][k][l][m] = 6.0 * gt [l][m] * dphi[j] * dphi[k] / SQR(SQR(phi)) - - 2.0 * gt [l][m] * d2phi[j][k] / (phi * SQR(phi)) - - 2.0 * dgt [j][l][m] * dphi[k] / (phi * SQR(phi)) - - 2.0 * dgt [k][l][m] * dphi[j] / (phi * SQR(phi)) + - d2gt[j][k][l][m] / SQR(phi); - } - - // ∂_j γ^{kl} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - double val = 0.0; - for (int m = 0; m < 3; m++) - for (int n = 0; n < 3; n++) - val += -gu[k][m] * gu[l][n] * dg[j][m][n]; - dgu[j][k][l] = val; - } - - // Γ^j_{kl} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - double val = 0.0; - for (int m = 0; m < 3; m++) - val += 0.5 * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]); - G[j][k][l] = val; - } - - // ∂_j Γ^k_{lm} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) - for (int m = 0; m < 3; m++) { - double val = 0.0; - for (int n = 0; n < 3; n++) { - val += dgu[j][k][n] * (dg [l][m][n] + dg [m][l][n] - dg [n][l][m]) + - gu [k][n] * (d2g[j][l][m][n] + d2g[j][m][l][n] - d2g[j][n][l][m]); - } - dG[j][k][l][m] = 0.5 * val; - } - - // Γ^j = γ^{kl} Γ_{kl}^j - for (int j = 0; j < 3; j++) { - double val = 0.0; - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) - val += gu[k][l] * G[j][k][l]; - X[j] = val; - } - - // ∂_j β_k - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int l = 0; l < 3; l++) - val += g[k][l] * dbeta[j][l] + dg[j][k][l] * beta[l]; - dbetal[j][k] = val; - } - - // ∂_{jk} β_l - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - double val = 0.0; - for (int m = 0; m < 3; m++) - val += d2g[j][k][l][m] * beta[m] + dg[k][l][m] * dbeta[j][m] + dg[j][l][m] * dbeta[k][m] + - g[l][m] * d2beta[j][k][m]; - d2betal[j][k][l] = val; - } - - // K_{ij} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - K[j][k] = At[j][k] / SQR(phi) + (1.0 / 3.0) * g[j][k] * trK; - - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int l = 0; l < 3; l++) - val += gu[j][l] * K[l][k]; - Km[j][k] = val; - } - - // K^{ij} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int l = 0; l < 3; l++) - val += gu[j][l] * Km[k][l]; - Ku[j][k] = val; - } - - // ∂_j \dot{γ_kl} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - dgdot[j][k][l] = -2.0 * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) + - d2betal[j][k][l] + d2betal[j][l][k]; - for (int m = 0; m < 3; m++) - dgdot[j][k][l] += -2.0 * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]); - } - - // \dot{γ^{jk}} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - gudot[j][k] = 2.0 * alpha * Ku[j][k]; - - for (int l = 0; l < 3; l++) { - gudot[j][k] += -gu[j][l] * dbeta[l][k] - gu[k][l] * dbeta[l][j]; - - for (int m = 0; m < 3; m++) - gudot[j][k] += -gu[j][l] * G[k][l][m] * beta[m] - gu[k][l] * G[j][l][m] * beta[m]; - } - } - - - // \dot{Γ}^j_{kl} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - double val = 0.0; - for (int m = 0; m < 3; m++) - val += gudot[j][m] * (dg [k][l][m] + dg [l][k][m] - dg [m][k][l]) + - gu [j][m] * (dgdot[k][l][m] + dgdot[l][k][m] - dgdot[m][k][l]); - - Gdot[j][k][l] = 0.5 * val; - } - - Gammadot_term = 0.0; - for (int j = 0; j < 3; j++) { - double val = 0.0; - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - val += gu[k][l] * Gdot[j][k][l]; - } - - Gammadot_term += val * dalpha[j]; - } - - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int l = 0; l < 3; l++) - val += G[l][j][k] * dalpha[l]; - - D2alpha[j][k] = d2alpha[j][k] - val; - } - - Kij_Dij_alpha = 0.0; - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - Kij_Dij_alpha += Ku[j][k] * D2alpha[j][k]; - } - - k3 = 0.0; - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int l = 0; l < 3; l++) - val += Km[k][l] * Km[l][j]; - k3 += val * Km[j][k]; - } - - // K_{ij} K^{ij} - k2 = 0.0; - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - k2 += Km[j][k] * Km[k][j]; - - // Ric_{jk} - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int m = 0; m < 3; m++) - val += dG[m][m][j][k] - dG[k][m][j][m]; - for (int m = 0; m < 3; m++) - for (int l = 0; l < 3; l++) - val += G[l][l][m] * G[m][j][k] - G[l][k][m] * G[m][j][l]; - Ric[j][k] = val; - } - - // Ric^j_k - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int l = 0; l < 3; l++) - val += gu[j][l] * Ric[l][k]; - Ricm[j][k] = val; - } - - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - double val = 0.0; - for (int l = 0; l < 3; l++) - val += K[j][l] * Km[l][k]; - - Kdot[j][k] = -D2alpha[j][k] + alpha * (Ric[j][k] + trK * K[j][k] - 2 * val); - } - - k_kdot = 0.0; - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) { - //k_kdot += kdot[j][k] * Ku[j][k]; - k_kdot += Kdot[j][k] * Ku[j][k]; - } - - - beta_term = 0.0; - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) { - double D2beta = 0.0; - - D2beta += d2beta[j][k][l]; - - for (int m = 0; m < 3; m++) { - D2beta += dG[j][l][k][m] * beta[m] + G[l][k][m] * dbeta[j][m] + G[l][j][m] * dbeta[k][m] - G[m][j][k] * dbeta[m][l]; - - for (int n = 0; n < 3; n++) - D2beta += G[l][j][m] * G[m][k][n] * beta[n] - G[m][j][k] * G[l][m][n] * beta[n]; - } - - beta_term += -gu[j][k] * D2beta * dalpha[l]; - } - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - beta_term += -beta[k] * Ricm[j][k] * dalpha[j]; - - solver->diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_dc] = gu[0][0] + (on_axis ? gu[1][1] : 0.0); - solver->diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_dc] = gu[2][2]; - solver->diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_dc] = 2.0 * gu[0][2]; - solver->diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_dc] = -X[0] + (on_axis ? 0.0 : gu[1][1] / x); - solver->diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_dc] = -X[2]; - solver->diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_dc] = -k2; - - solver->rhs[idx_rhs] = -2.0 * alpha * Kij_Dij_alpha + Gammadot_term + - 2.0 * alpha * (k_kdot + 2.0 * alpha * k3) + beta_term; - - if (on_axis) { - solver->diff_coeffs[MG2D_DIFF_COEFF_20]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; - solver->diff_coeffs[MG2D_DIFF_COEFF_10]->boundaries[MG2D_BOUNDARY_0L].val[idx_z] = gu[1][1]; - } - } + fill_eq_coeffs_line(gh, cp, solver, vars, idx_z, stride_z, dx_inv, dz_inv); } static void mirror(CoordPatch *cp, double *dst) -- cgit v1.2.3