/* * Quasimaximal slicing -- actual solver code * Copyright (C) 2016 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "common.h" #include #include #include #include #include #if HAVE_OPENCL #include #include #endif #include "cctk.h" #include "cctk_Timers.h" #include "util_Table.h" #include "basis.h" #include "pssolve.h" #include "qms_solve.h" #define NB_COEFFS(qms) (qms->nb_coeffs[0] * qms->nb_coeffs[1]) #define NB_COLLOC_POINTS(qms) (qms->nb_colloc_points[0] * qms->nb_colloc_points[1]) /* indices (in our code, not cactus structs) of the grid functions which we'll need to * interpolate on the pseudospectral grid */ enum MetricVars { GTXX = 0, GTYY, GTZZ, GTXY, GTXZ, GTYZ, PHI, ATXX, ATYY, ATZZ, ATXY, ATXZ, ATYZ, K, BETAX, BETAY, BETAZ, ALPHA, KDOT_XX, KDOT_YY, KDOT_ZZ, KDOT_XY, KDOT_XZ, KDOT_YZ, NB_METRIC_VARS, }; /* indices of the interpolated values of the above grid functions and their derivatives */ enum InterpMetricVars { I_GTXX = 0, I_GTYY, I_GTZZ, I_GTXY, I_GTXZ, I_GTYZ, I_GTXX_DX, I_GTYY_DX, I_GTZZ_DX, I_GTXZ_DX, I_GTXX_DZ, I_GTYY_DZ, I_GTZZ_DZ, I_GTXZ_DZ, I_GTXX_DXX, I_GTYY_DXX, I_GTZZ_DXX, I_GTXZ_DXX, I_GTXX_DXZ, I_GTYY_DXZ, I_GTZZ_DXZ, I_GTXZ_DXZ, I_GTXX_DZZ, I_GTYY_DZZ, I_GTZZ_DZZ, I_GTXZ_DZZ, I_PHI, I_PHI_DX, I_PHI_DZ, I_PHI_DXX, I_PHI_DZZ, I_PHI_DXZ, I_ATXX, I_ATYY, I_ATZZ, I_ATXY, I_ATXZ, I_ATYZ, I_ATXX_DX, I_ATYY_DX, I_ATZZ_DX, I_ATXZ_DX, I_ATXX_DZ, I_ATYY_DZ, I_ATZZ_DZ, I_ATXZ_DZ, I_TRK, I_TRK_DX, I_TRK_DZ, I_BETAX, I_BETAY, I_BETAZ, I_BETAX_DX, I_BETAX_DZ, I_BETAX_DXX, I_BETAX_DXZ, I_BETAX_DZZ, I_BETAZ_DX, I_BETAZ_DZ, I_BETAZ_DXX, I_BETAZ_DXZ, I_BETAZ_DZZ, I_ALPHA, I_ALPHA_DX, I_ALPHA_DZ, I_ALPHA_DXX, I_ALPHA_DZZ, I_ALPHA_DXZ, I_KDOT_XX, I_KDOT_YY, I_KDOT_ZZ, I_KDOT_XY, I_KDOT_XZ, I_KDOT_YZ, NB_INTERP_VARS, }; struct QMSSolverPriv { PSSolveContext *ps_ctx; cGH *gh; int colloc_grid_order[2]; double *eq_coeffs[PSSOLVE_DIFF_ORDER_NB]; double *rhs; double *coeff_scale; // interpolation parameters int coord_system; int interp_operator; int interp_params; CCTK_REAL *interp_coords[3]; int interp_vars_indices[NB_METRIC_VARS]; CCTK_REAL *interp_values[NB_INTERP_VARS]; CCTK_INT interp_value_codes[NB_INTERP_VARS]; #if HAVE_OPENCL // OpenCL / CLBLAS stuff cl_context ocl_ctx; cl_command_queue ocl_queue; #endif uint64_t solve_count; uint64_t solve_time; uint64_t interp_geometry_count; uint64_t interp_geometry_time; uint64_t calc_eq_coeffs_count; uint64_t calc_eq_coeffs_time; }; /* mapping between our indices and thorn names */ static const char *metric_vars_ccz4[] = { [GTXX] = "ML_CCZ4::gt11", [GTYY] = "ML_CCZ4::gt22", [GTZZ] = "ML_CCZ4::gt33", [GTXY] = "ML_CCZ4::gt12", [GTXZ] = "ML_CCZ4::gt13", [GTYZ] = "ML_CCZ4::gt23", [ATXX] = "ML_CCZ4::At11", [ATYY] = "ML_CCZ4::At22", [ATZZ] = "ML_CCZ4::At33", [ATXY] = "ML_CCZ4::At12", [ATXZ] = "ML_CCZ4::At13", [ATYZ] = "ML_CCZ4::At23", [PHI] = "ML_CCZ4::phi", [K] = "ML_CCZ4::trK", [BETAX] = "ML_CCZ4::beta1", [BETAY] = "ML_CCZ4::beta2", [BETAZ] = "ML_CCZ4::beta3", [ALPHA] = "ML_CCZ4::alpha", [KDOT_XX] = "ML_CCZ4::Kdot11", [KDOT_YY] = "ML_CCZ4::Kdot22", [KDOT_ZZ] = "ML_CCZ4::Kdot33", [KDOT_XY] = "ML_CCZ4::Kdot12", [KDOT_XZ] = "ML_CCZ4::Kdot13", [KDOT_YZ] = "ML_CCZ4::Kdot23", }; static const char *metric_vars_bssn[] = { [GTXX] = "ML_BSSN::gt11", [GTYY] = "ML_BSSN::gt22", [GTZZ] = "ML_BSSN::gt33", [GTXY] = "ML_BSSN::gt12", [GTXZ] = "ML_BSSN::gt13", [GTYZ] = "ML_BSSN::gt23", [ATXX] = "ML_BSSN::At11", [ATYY] = "ML_BSSN::At22", [ATZZ] = "ML_BSSN::At33", [ATXY] = "ML_BSSN::At12", [ATXZ] = "ML_BSSN::At13", [ATYZ] = "ML_BSSN::At23", [PHI] = "ML_BSSN::phi", [K] = "ML_BSSN::trK", [BETAX] = "ML_BSSN::beta1", [BETAY] = "ML_BSSN::beta2", [BETAZ] = "ML_BSSN::beta3", [ALPHA] = "ML_BSSN::alpha", //[ALPHA] = "ADMBase::alp", [KDOT_XX] = "ML_BSSN::Kdot11", [KDOT_YY] = "ML_BSSN::Kdot22", [KDOT_ZZ] = "ML_BSSN::Kdot33", [KDOT_XY] = "ML_BSSN::Kdot12", [KDOT_XZ] = "ML_BSSN::Kdot13", [KDOT_YZ] = "ML_BSSN::Kdot23", }; /* mapping between the cactus grid values and interpolated values */ static const CCTK_INT interp_operation_indices[] = { [I_GTXX] = GTXX, [I_GTYY] = GTYY, [I_GTZZ] = GTZZ, [I_GTXY] = GTXY, [I_GTXZ] = GTXZ, [I_GTYZ] = GTYZ, [I_GTXX_DX] = GTXX, [I_GTYY_DX] = GTYY, [I_GTZZ_DX] = GTZZ, [I_GTXZ_DX] = GTXZ, [I_GTXX_DZ] = GTXX, [I_GTYY_DZ] = GTYY, [I_GTZZ_DZ] = GTZZ, [I_GTXZ_DZ] = GTXZ, [I_GTXX_DXX] = GTXX, [I_GTYY_DXX] = GTYY, [I_GTZZ_DXX] = GTZZ, [I_GTXZ_DXX] = GTXZ, [I_GTXX_DXZ] = GTXX, [I_GTYY_DXZ] = GTYY, [I_GTZZ_DXZ] = GTZZ, [I_GTXZ_DXZ] = GTXZ, [I_GTXX_DZZ] = GTXX, [I_GTYY_DZZ] = GTYY, [I_GTZZ_DZZ] = GTZZ, [I_GTXZ_DZZ] = GTXZ, [I_PHI] = PHI, [I_PHI_DX] = PHI, [I_PHI_DZ] = PHI, [I_PHI_DXX] = PHI, [I_PHI_DZZ] = PHI, [I_PHI_DXZ] = PHI, [I_ATXX] = ATXX, [I_ATYY] = ATYY, [I_ATZZ] = ATZZ, [I_ATXY] = ATXY, [I_ATXZ] = ATXZ, [I_ATYZ] = ATYZ, [I_ATXX_DX] = ATXX, [I_ATYY_DX] = ATYY, [I_ATZZ_DX] = ATZZ, [I_ATXZ_DX] = ATXZ, [I_ATXX_DZ] = ATXX, [I_ATYY_DZ] = ATYY, [I_ATZZ_DZ] = ATZZ, [I_ATXZ_DZ] = ATXZ, [I_TRK] = K, [I_TRK_DX] = K, [I_TRK_DZ] = K, [I_BETAX] = BETAX, [I_BETAY] = BETAY, [I_BETAZ] = BETAZ, [I_BETAX_DX] = BETAX, [I_BETAX_DZ] = BETAX, [I_BETAX_DXX] = BETAX, [I_BETAX_DXZ] = BETAX, [I_BETAX_DZZ] = BETAX, [I_BETAZ_DX] = BETAZ, [I_BETAZ_DZ] = BETAZ, [I_BETAZ_DXX] = BETAZ, [I_BETAZ_DXZ] = BETAZ, [I_BETAZ_DZZ] = BETAZ, [I_ALPHA] = ALPHA, [I_ALPHA_DX] = ALPHA, [I_ALPHA_DZ] = ALPHA, [I_ALPHA_DXX] = ALPHA, [I_ALPHA_DZZ] = ALPHA, [I_ALPHA_DXZ] = ALPHA, [I_KDOT_XX] = KDOT_XX, [I_KDOT_YY] = KDOT_YY, [I_KDOT_ZZ] = KDOT_ZZ, [I_KDOT_XY] = KDOT_XY, [I_KDOT_XZ] = KDOT_XZ, [I_KDOT_YZ] = KDOT_YZ, }; /* the operation (plain value or x/y/z-derivative) to apply during interpolation */ static const CCTK_INT interp_operation_codes[] = { [I_GTXX] = 0, [I_GTYY] = 0, [I_GTZZ] = 0, [I_GTXY] = 0, [I_GTXZ] = 0, [I_GTYZ] = 0, [I_GTXX_DX] = 1, [I_GTYY_DX] = 1, [I_GTZZ_DX] = 1, [I_GTXZ_DX] = 1, [I_GTXX_DZ] = 3, [I_GTYY_DZ] = 3, [I_GTZZ_DZ] = 3, [I_GTXZ_DZ] = 3, [I_GTXX_DXX] = 11, [I_GTYY_DXX] = 11, [I_GTZZ_DXX] = 11, [I_GTXZ_DXX] = 11, [I_GTXX_DXZ] = 13, [I_GTYY_DXZ] = 13, [I_GTZZ_DXZ] = 13, [I_GTXZ_DXZ] = 13, [I_GTXX_DZZ] = 33, [I_GTYY_DZZ] = 33, [I_GTZZ_DZZ] = 33, [I_GTXZ_DZZ] = 33, [I_PHI] = 0, [I_PHI_DX] = 1, [I_PHI_DZ] = 3, [I_PHI_DXX] = 11, [I_PHI_DZZ] = 33, [I_PHI_DXZ] = 13, [I_ATXX] = 0, [I_ATYY] = 0, [I_ATZZ] = 0, [I_ATXY] = 0, [I_ATXZ] = 0, [I_ATYZ] = 0, [I_ATXX_DX] = 1, [I_ATYY_DX] = 1, [I_ATZZ_DX] = 1, [I_ATXZ_DX] = 1, [I_ATXX_DZ] = 3, [I_ATYY_DZ] = 3, [I_ATZZ_DZ] = 3, [I_ATXZ_DZ] = 3, [I_TRK] = 0, [I_TRK_DX] = 1, [I_TRK_DZ] = 3, [I_BETAX] = 0, [I_BETAY] = 0, [I_BETAZ] = 0, [I_BETAX_DX] = 1, [I_BETAX_DZ] = 3, [I_BETAX_DXX] = 11, [I_BETAX_DXZ] = 13, [I_BETAX_DZZ] = 33, [I_BETAZ_DX] = 1, [I_BETAZ_DZ] = 3, [I_BETAZ_DXX] = 11, [I_BETAZ_DXZ] = 13, [I_BETAZ_DZZ] = 33, [I_ALPHA] = 0, [I_ALPHA_DX] = 1, [I_ALPHA_DZ] = 3, [I_ALPHA_DXX] = 11, [I_ALPHA_DZZ] = 33, [I_ALPHA_DXZ] = 13, [I_KDOT_XX] = 0, [I_KDOT_YY] = 0, [I_KDOT_ZZ] = 0, [I_KDOT_XY] = 0, [I_KDOT_XZ] = 0, [I_KDOT_YZ] = 0, }; /* interpolate the cactus gridfunctions onto the pseudospectral grid */ static int interp_geometry(QMSSolver *ctx) { QMSSolverPriv *s = ctx->priv; int ret; ret = CCTK_InterpGridArrays(s->gh, 3, s->interp_operator, s->interp_params, s->coord_system, NB_COLLOC_POINTS(ctx), CCTK_VARIABLE_REAL, (const void * const *)s->interp_coords, ARRAY_ELEMS(s->interp_vars_indices), s->interp_vars_indices, ARRAY_ELEMS(s->interp_values), s->interp_value_codes, (void * const *)s->interp_values); if (ret < 0) CCTK_WARN(0, "Error interpolating"); return 0; } /* evaluate the equation coefficients at the collocation points */ static int calc_eq_coeffs(QMSSolver *ctx) { QMSSolverPriv *s = ctx->priv; //#pragma omp parallel for schedule(dynamic, ms->nb_colloc_points_x) for (int i = 0; i < NB_COLLOC_POINTS(ctx); i++) { const double x = s->interp_coords[0][i]; const double z = s->interp_coords[2][i]; const int zaxis = x <= EPS; double K[3][3], Km[3][3], Ku[3][3], dK[3][3][3]; double gtu[3][3], g[3][3], gu[3][3]; double dg[3][3][3], dgu[3][3][3], d2g[3][3][3][3], gudot[3][3]; double G[3][3][3], dG[3][3][3][3], Gdot[3][3][3], X[3]; double Ric[3][3], Ricm[3][3]; double dgdot[3][3][3]; double k2, Kij_Dij_alpha, k_kdot, k3; double Gammadot_term, beta_term; double betal[3], dbetal[3][3], d2betal[3][3][3]; const double gtxx = s->interp_values[I_GTXX][i]; const double gtyy = s->interp_values[I_GTYY][i]; const double gtzz = s->interp_values[I_GTZZ][i]; const double gtxy = s->interp_values[I_GTXY][i]; const double gtxz = s->interp_values[I_GTXZ][i]; const double gtyz = s->interp_values[I_GTYZ][i]; const double gt[3][3] = {{ gtxx, gtxy, gtxz }, { gtxy, gtyy, gtyz }, { gtxz, gtyz, gtzz }}; const double dx_gt11 = s->interp_values[I_GTXX_DX][i]; const double dx_gt22 = s->interp_values[I_GTYY_DX][i]; const double dx_gt33 = s->interp_values[I_GTZZ_DX][i]; const double dx_gt13 = s->interp_values[I_GTXZ_DX][i]; const double dz_gt11 = s->interp_values[I_GTXX_DZ][i]; const double dz_gt22 = s->interp_values[I_GTYY_DZ][i]; const double dz_gt33 = s->interp_values[I_GTZZ_DZ][i]; const double dz_gt13 = s->interp_values[I_GTXZ_DZ][i]; const double dgt[3][3][3] = { { { dx_gt11, 0.0, dx_gt13 }, { 0.0, dx_gt22, 0.0 }, { dx_gt13, 0.0, dx_gt33 }, }, { { 0.0, zaxis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0 }, { zaxis ? dx_gt11 - dx_gt22 : (gtxx - gtyy) / x, 0.0, zaxis ? dx_gt13 : gtxz / x }, { 0.0, zaxis ? dx_gt13 : gtxz / x, 0.0 }, }, { { dz_gt11, 0.0, dz_gt13 }, { 0.0, dz_gt22, 0.0 }, { dz_gt13, 0.0, dz_gt33 }, }, }; const double dxx_gt11 = s->interp_values[I_GTXX_DXX][i]; const double dxx_gt22 = s->interp_values[I_GTYY_DXX][i]; const double dxx_gt33 = s->interp_values[I_GTZZ_DXX][i]; const double dxx_gt13 = s->interp_values[I_GTXZ_DXX][i]; const double dxz_gt11 = s->interp_values[I_GTXX_DXZ][i]; const double dxz_gt22 = s->interp_values[I_GTYY_DXZ][i]; const double dxz_gt33 = s->interp_values[I_GTZZ_DXZ][i]; const double dxz_gt13 = s->interp_values[I_GTXZ_DXZ][i]; const double dzz_gt11 = s->interp_values[I_GTXX_DZZ][i]; const double dzz_gt22 = s->interp_values[I_GTYY_DZZ][i]; const double dzz_gt33 = s->interp_values[I_GTZZ_DZZ][i]; const double dzz_gt13 = s->interp_values[I_GTXZ_DZZ][i]; const double d2gt[3][3][3][3] = { { { { dxx_gt11, 0.0, dxx_gt13 }, { 0.0, dxx_gt22, 0.0 }, { dxx_gt13, 0.0, dxx_gt33 }, }, { { 0.0, zaxis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, { zaxis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, zaxis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, { 0.0, zaxis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, }, { { dxz_gt11, 0.0, dxz_gt13 }, { 0.0, dxz_gt22, 0.0 }, { dxz_gt13, 0.0, dxz_gt33 }, }, }, { { { 0.0, zaxis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0 }, { zaxis ? 0.5 * (dxx_gt11 - dxx_gt22) : (dx_gt11 - dx_gt22) / x - (gtxx - gtyy) / SQR(x), 0.0, zaxis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, { 0.0, zaxis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0 }, }, { { zaxis ? dxx_gt22 : dx_gt11 / x - 2 * (gtxx - gtyy) / SQR(x), 0.0, zaxis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x) }, { 0.0, zaxis ? dxx_gt11 : dx_gt22 / x + 2.0 * (gtxx - gtyy) / SQR(x), 0.0 }, { zaxis ? 0.5 * dxx_gt13 : dx_gt13 / x - gtxz / SQR(x), 0.0, zaxis ? dxx_gt33 : dx_gt33 / x }, }, { { 0.0, zaxis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, { zaxis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, zaxis ? dxz_gt13 : dz_gt13 / x }, { 0.0, zaxis ? dxz_gt13 : dz_gt13 / x, 0.0 }, }, }, { { { dxz_gt11, 0.0, dxz_gt13 }, { 0.0, dxz_gt22, 0.0 }, { dxz_gt13, 0.0, dxz_gt33 }, }, { { 0.0, zaxis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0 }, { zaxis ? dxz_gt11 - dxz_gt22 : (dz_gt11 - dz_gt22) / x, 0.0, zaxis ? dxz_gt13 : dz_gt13 / x }, { 0.0, zaxis ? dxz_gt13 : dz_gt13 / x, 0.0 }, }, { { dzz_gt11, 0.0, dzz_gt13 }, { 0.0, dzz_gt22, 0.0 }, { dzz_gt13, 0.0, dzz_gt33 }, }, }, }; const double Atxx = s->interp_values[I_ATXX][i]; const double Atyy = s->interp_values[I_ATYY][i]; const double Atzz = s->interp_values[I_ATZZ][i]; const double Atxy = s->interp_values[I_ATXY][i]; const double Atxz = s->interp_values[I_ATXZ][i]; const double Atyz = s->interp_values[I_ATYZ][i]; const double At[3][3] = {{ Atxx, Atxy, Atxz }, { Atxy, Atyy, Atyz }, { Atxz, Atyz, Atzz }}; const double dx_At11 = s->interp_values[I_ATXX_DX][i]; const double dx_At22 = s->interp_values[I_ATYY_DX][i]; const double dx_At33 = s->interp_values[I_ATZZ_DX][i]; const double dx_At13 = s->interp_values[I_ATXZ_DX][i]; const double dz_At11 = s->interp_values[I_ATXX_DZ][i]; const double dz_At22 = s->interp_values[I_ATYY_DZ][i]; const double dz_At33 = s->interp_values[I_ATZZ_DZ][i]; const double dz_At13 = s->interp_values[I_ATXZ_DZ][i]; const double dAt[3][3][3] = { { { dx_At11, 0.0, dx_At13 }, { 0.0, dx_At22, 0.0 }, { dx_At13, 0.0, dx_At33 }, }, { { 0.0, zaxis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0 }, { zaxis ? dx_At11 - dx_At22 : (Atxx - Atyy) / x, 0.0, zaxis ? dx_At13 : Atxz / x }, { 0.0, zaxis ? dx_At13 : Atxz / x, 0.0 }, }, { { dz_At11, 0.0, dz_At13 }, { 0.0, dz_At22, 0.0 }, { dz_At13, 0.0, dz_At33 }, }, }; const double phi = s->interp_values[I_PHI][i]; const double dx_phi = s->interp_values[I_PHI_DX][i]; const double dz_phi = s->interp_values[I_PHI_DZ][i]; const double dphi[3] = { dx_phi, 0.0, dz_phi }; const double phi_dxx = s->interp_values[I_PHI_DXX][i]; const double phi_dzz = s->interp_values[I_PHI_DZZ][i]; const double phi_dxz = s->interp_values[I_PHI_DXZ][i]; const double d2phi[3][3] = { { phi_dxx, 0.0, phi_dxz }, { 0.0, zaxis ? phi_dxx : dx_phi / x, 0.0 }, { phi_dxz, 0.0, phi_dzz }, }; const double trK = s->interp_values[I_TRK][i]; const double dx_trK = s->interp_values[I_TRK_DX][i]; const double dz_trK = s->interp_values[I_TRK_DZ][i]; const double dtrK[3] = { dx_trK, 0.0, dz_trK }; const double kdot_xx = s->interp_values[I_KDOT_XX][i]; const double kdot_yy = s->interp_values[I_KDOT_YY][i]; const double kdot_zz = s->interp_values[I_KDOT_ZZ][i]; const double kdot_xy = s->interp_values[I_KDOT_XY][i]; const double kdot_xz = s->interp_values[I_KDOT_XZ][i]; const double kdot_yz = s->interp_values[I_KDOT_YZ][i]; const double kdot[3][3] = {{ kdot_xx, kdot_xy, kdot_xz }, { kdot_xy, kdot_yy, kdot_yz }, { kdot_xz, kdot_yz, kdot_zz }}; const double alpha = s->interp_values[I_ALPHA][i]; const double dx_alpha = s->interp_values[I_ALPHA_DX][i]; const double dz_alpha = s->interp_values[I_ALPHA_DZ][i]; const double dalpha[3] = { dx_alpha, 0.0, dz_alpha }; const double dxx_alpha = s->interp_values[I_ALPHA_DXX][i]; const double dzz_alpha = s->interp_values[I_ALPHA_DZZ][i]; const double dxz_alpha = s->interp_values[I_ALPHA_DXZ][i]; const double d2alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha }, { 0, zaxis ? dxx_alpha : dx_alpha / x, 0 }, { dxz_alpha, 0, dzz_alpha }}; const double betax = s->interp_values[I_BETAX][i]; const double betaz = s->interp_values[I_BETAZ][i]; const double beta[3] = { betax, 0.0, betaz }; const double dx_betax = s->interp_values[I_BETAX_DX][i]; const double dz_betax = s->interp_values[I_BETAX_DZ][i]; const double dx_betaz = s->interp_values[I_BETAZ_DX][i]; const double dz_betaz = s->interp_values[I_BETAZ_DZ][i]; const double dbeta[3][3] = {{ dx_betax, 0.0, dx_betaz }, { 0.0, zaxis ? dx_betax : betax / x, 0.0 }, { dz_betax, 0.0, dz_betaz }}; const double dxx_betax = s->interp_values[I_BETAX_DXX][i]; const double dxz_betax = s->interp_values[I_BETAX_DXZ][i]; const double dzz_betax = s->interp_values[I_BETAX_DZZ][i]; const double dxx_betaz = s->interp_values[I_BETAZ_DXX][i]; const double dxz_betaz = s->interp_values[I_BETAZ_DXZ][i]; const double dzz_betaz = s->interp_values[I_BETAZ_DZZ][i]; const double d2beta[3][3][3] = { { { dxx_betax, 0.0, dxx_betaz }, { 0.0, zaxis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, { dxz_betax, 0.0, dxz_betaz }, }, { { 0.0, zaxis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0 }, { zaxis ? 0.5 * dxx_betax : dx_betax / x - betax / SQR(x), 0.0, zaxis ? dxx_betaz : dx_betaz / x }, { 0.0, zaxis ? dxz_betax : dz_betax / x, 0.0 }, }, { { dxz_betax, 0.0, dxz_betaz }, { 0.0, zaxis ? dxz_betax : dz_betax / x, 0.0 }, { dzz_betax, 0.0, dzz_betaz }, }, }; const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz); // \tilde{γ}^{ij} gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det; gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det; gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det; gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det; gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det; gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det; gtu[1][0] = gtu[0][1]; gtu[2][0] = gtu[0][2]; gtu[2][1] = gtu[1][2]; // γ_{jk}/^{jk} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { gu[j][k] = SQR(phi) * gtu[j][k]; g[j][k] = gt[j][k] / SQR(phi); } // β_j for (int j = 0; j < 3; j++) { double val = 0.0; for (int k = 0; k < 3; k++) val += g[j][k] * beta[k]; betal[j] = val; } // ∂_j γ_{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { dg[j][k][l] = -2.0 * dphi[j] * gt[k][l] / (phi * SQR(phi)) + dgt[j][k][l] / SQR(phi); dK[j][k][l] = -2.0 * dphi[j] * At[k][l] / (phi * SQR(phi)) + dAt[j][k][l] / SQR(phi) + (1.0 / 3.0) * (dtrK[j] * g[k][l] + trK * dg[j][k][l]); } // ∂_{jk} g_{lm} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) for (int m = 0; m < 3; m++) { d2g[j][k][l][m] = 6.0 * gt [l][m] * dphi[j] * dphi[k] / SQR(SQR(phi)) - 2.0 * gt [l][m] * d2phi[j][k] / (phi * SQR(phi)) - 2.0 * dgt [j][l][m] * dphi[k] / (phi * SQR(phi)) - 2.0 * dgt [k][l][m] * dphi[j] / (phi * SQR(phi)) + d2gt[j][k][l][m] / SQR(phi); } // ∂_j γ^{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { double val = 0.0; for (int m = 0; m < 3; m++) for (int n = 0; n < 3; n++) val += -gu[k][m] * gu[l][n] * dg[j][m][n]; dgu[j][k][l] = val; } // Γ^j_{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { double val = 0.0; for (int m = 0; m < 3; m++) val += 0.5 * gu[j][m] * (dg[k][l][m] + dg[l][k][m] - dg[m][k][l]); G[j][k][l] = val; } // ∂_j Γ^k_{lm} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) for (int m = 0; m < 3; m++) { double val = 0.0; for (int n = 0; n < 3; n++) { val += dgu[j][k][n] * (dg [l][m][n] + dg [m][l][n] - dg [n][l][m]) + gu [k][n] * (d2g[j][l][m][n] + d2g[j][m][l][n] - d2g[j][n][l][m]); } dG[j][k][l][m] = 0.5 * val; } for (int j = 0; j < 3; j++) { double val = 0.0; for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) val += gu[k][l] * G[j][k][l]; X[j] = val; } // ∂_j β_k for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += g[k][l] * dbeta[j][l] + dg[j][k][l] * beta[l]; dbetal[j][k] = val; } // ∂_{jk} β_l for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { double val = 0.0; for (int m = 0; m < 3; m++) val += d2g[j][k][l][m] * beta[m] + dg[k][l][m] * dbeta[j][m] + dg[j][l][m] * dbeta[k][m] + g[l][m] * d2beta[j][k][m]; d2betal[j][k][l] = val; } // K_{ij} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) K[j][k] = At[j][k] / SQR(phi) + (1.0 / 3.0) * g[j][k] * trK; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += gu[j][l] * K[l][k]; Km[j][k] = val; } // K^{ij} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += gu[j][l] * Km[k][l]; Ku[j][k] = val; } // ∂_j \dot{γ_kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { dgdot[j][k][l] = -2.0 * (dalpha[j] * K[k][l] + alpha * dK[j][k][l]) + d2betal[j][k][l] + d2betal[j][l][k]; for (int m = 0; m < 3; m++) dgdot[j][k][l] += -2.0 * (dG[j][m][k][l] * betal[m] + G[m][k][l] * dbetal[j][m]); } // \dot{γ^{jk} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { gudot[j][k] = 2.0 * alpha * Ku[j][k]; for (int l = 0; l < 3; l++) { gudot[j][k] += -gu[j][l] * dbeta[l][k] - gu[k][l] * dbeta[l][j]; for (int m = 0; m < 3; m++) gudot[j][k] += -gu[j][l] * G[k][l][m] * beta[m] - gu[k][l] * G[j][l][m] * beta[m]; } } // \dot{Γ}^j_{kl} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { double val = 0.0; for (int m = 0; m < 3; m++) val += gudot[j][m] * (dg [k][l][m] + dg [l][k][m] - dg [m][k][l]) + gu [j][m] * (dgdot[k][l][m] + dgdot[l][k][m] - dgdot[m][k][l]); Gdot[j][k][l] = 0.5 * val; } Gammadot_term = 0.0; for (int j = 0; j < 3; j++) { double val = 0.0; for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { val += gu[k][l] * Gdot[j][k][l]; } Gammadot_term += val * dalpha[j]; } Kij_Dij_alpha = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += G[l][j][k] * dalpha[l]; Kij_Dij_alpha += Ku[j][k] * (d2alpha[j][k] - val); } k_kdot = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) k_kdot += kdot[j][k] * Ku[j][k]; k3 = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += Km[k][l] * Km[l][j]; k3 += val * Km[j][k]; } // K_{ij} K^{ij} k2 = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) k2 += Km[j][k] * Km[k][j]; // Ric_{jk} for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int m = 0; m < 3; m++) val += dG[m][m][j][k] - dG[k][m][j][m]; for (int m = 0; m < 3; m++) for (int l = 0; l < 3; l++) val += G[l][l][m] * G[m][j][k] - G[l][k][m] * G[m][j][l]; Ric[j][k] = val; } // Ric^j_k for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) { double val = 0.0; for (int l = 0; l < 3; l++) val += gu[j][l] * Ric[l][k]; Ricm[j][k] = val; } beta_term = 0.0; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) for (int l = 0; l < 3; l++) { double D2beta = 0.0; D2beta += d2beta[j][k][l]; for (int m = 0; m < 3; m++) { D2beta += dG[j][l][k][m] * beta[m] + G[l][k][m] * dbeta[j][m] + G[l][j][m] * dbeta[k][m] - G[m][j][k] * dbeta[m][l]; for (int n = 0; n < 3; n++) D2beta += G[l][j][m] * G[m][k][n] * beta[n] - G[m][j][k] * G[l][m][n] * beta[n]; } beta_term += -gu[j][k] * D2beta * dalpha[l]; } for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) beta_term += -beta[k] * Ricm[j][k] * dalpha[j]; s->eq_coeffs[PSSOLVE_DIFF_ORDER_20][i] = gu[0][0] + ((x <= EPS) ? gu[1][1] : 0.0); s->eq_coeffs[PSSOLVE_DIFF_ORDER_02][i] = gu[2][2]; s->eq_coeffs[PSSOLVE_DIFF_ORDER_11][i] = 2.0 * gu[0][2]; s->eq_coeffs[PSSOLVE_DIFF_ORDER_10][i] = -X[0] + ((x > EPS) ? gu[1][1] / x : 0.0); s->eq_coeffs[PSSOLVE_DIFF_ORDER_01][i] = -X[2]; s->eq_coeffs[PSSOLVE_DIFF_ORDER_00][i] = -k2; s->rhs[i] = -2 * alpha * Kij_Dij_alpha + Gammadot_term + 2 * alpha * (k_kdot + 2 * alpha * k3) + beta_term; } return 0; } int qms_solver_solve(QMSSolver *ctx) { QMSSolverPriv *s = ctx->priv; int ret; int64_t start, totaltime_start; totaltime_start = gettime(); /* interpolate the metric values and construct the quantities we'll need */ CCTK_TimerStart("QuasiMaximalSlicing_interp_geometry"); start = gettime(); ret = interp_geometry(ctx); s->interp_geometry_time += gettime() - start; s->interp_geometry_count++; CCTK_TimerStop("QuasiMaximalSlicing_interp_geometry"); if (ret < 0) return ret; CCTK_TimerStart("QuasiMaximalSlicing_calc_eq_coeffs"); start = gettime(); ret = calc_eq_coeffs(ctx); s->calc_eq_coeffs_time += gettime() - start; s->calc_eq_coeffs_count++; CCTK_TimerStop("QuasiMaximalSlicing_calc_eq_coeffs"); if (ret < 0) return ret; ret = qms_pssolve_solve(s->ps_ctx, (const double * const *)s->eq_coeffs, s->rhs, ctx->coeffs); if (ret < 0) return ret; for (int i = 0; i < NB_COEFFS(ctx); i++) ctx->coeffs[i] *= s->coeff_scale[i]; s->solve_count++; s->solve_time += gettime() - totaltime_start; return 0; } void qms_solver_print_stats(QMSSolver *ctx) { QMSSolverPriv *s = ctx->priv; fprintf(stderr, "%g%% interpolate geometry: %lu, " "total time %g s, avg time per call %g ms\n", (double)s->interp_geometry_time * 100 / s->solve_time, s->interp_geometry_count, (double)s->interp_geometry_time / 1e6, (double)s->interp_geometry_time / s->interp_geometry_count / 1e3); fprintf(stderr, "%g%% calc equation coefficients: %lu, " "total time %g s, avg time per call %g ms\n", (double)s->calc_eq_coeffs_time * 100 / s->solve_time, s->calc_eq_coeffs_count, (double)s->calc_eq_coeffs_time / 1e6, (double)s->calc_eq_coeffs_time / s->calc_eq_coeffs_count / 1e3); fprintf(stderr, "%g%% pseudospectral matrix construction: %lu, " "total time %g s, avg time per call %g ms\n", (double)s->ps_ctx->construct_matrix_time * 100 / s->solve_time, s->ps_ctx->construct_matrix_count, (double)s->ps_ctx->construct_matrix_time / 1e6, (double)s->ps_ctx->construct_matrix_time / s->ps_ctx->construct_matrix_count / 1e3); fprintf(stderr, "%g%% BiCGSTAB %lu solves, " "%lu iterations, total time %g s, " "avg iterations per solve %g, avg time per solve %g ms, " "avg time per iteration %g ms\n", (double)s->ps_ctx->cg_time_total * 100 / s->solve_time, s->ps_ctx->cg_solve_count, s->ps_ctx->cg_iter_count, (double)s->ps_ctx->cg_time_total / 1e6, (double)s->ps_ctx->cg_iter_count / s->ps_ctx->cg_solve_count, (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_solve_count / 1e3, (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_iter_count / 1e3); fprintf(stderr, "%g%% LU %lu solves, total time %g s, avg time per solve %g ms\n", (double)s->ps_ctx->lu_solves_time * 100 / s->solve_time, s->ps_ctx->lu_solves_count, (double)s->ps_ctx->lu_solves_time / 1e6, (double)s->ps_ctx->lu_solves_time / s->ps_ctx->lu_solves_count / 1e3); } static void init_opencl(QMSSolver *ctx) #if HAVE_OPENCL { QMSSolverPriv *s = ctx->priv; int err, count; cl_platform_id platform; cl_context_properties props[3]; cl_device_id ocl_device; err = clGetPlatformIDs(1, &platform, &count); if (err != CL_SUCCESS || count < 1) { fprintf(stderr, "Could not get an OpenCL platform ID\n"); return; } err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ocl_device, &count); if (err != CL_SUCCESS || count < 1) { fprintf(stderr, "Could not get an OpenCL device ID\n"); return; } props[0] = CL_CONTEXT_PLATFORM; props[1] = (cl_context_properties)platform; props[2] = 0; s->ocl_ctx = clCreateContext(props, 1, &ocl_device, NULL, NULL, &err); if (err != CL_SUCCESS || !s->ocl_ctx) { fprintf(stderr, "Could not create an OpenCL context\n"); return; } s->ocl_queue = clCreateCommandQueue(s->ocl_ctx, ocl_device, 0, &err); if (err != CL_SUCCESS || !s->ocl_queue) { fprintf(stderr, "Could not create an OpenCL command queue: %d\n", err); goto fail; } err = clblasSetup(); if (err != CL_SUCCESS) { fprintf(stderr, "Error setting up clBLAS\n"); goto fail; } return; fail: if (s->ocl_queue) clReleaseCommandQueue(s->ocl_queue); s->ocl_queue = 0; if (s->ocl_ctx) clReleaseContext(s->ocl_ctx); s->ocl_ctx = 0; } #else { } #endif int qms_solver_init(QMSSolver **pctx, cGH *cctkGH, int basis_order_r, int basis_order_z, double outer_bound, double filter_power, double input_filter_power, int ccz4) { QMSSolver *ctx; QMSSolverPriv *s; int ret; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return -ENOMEM; ctx->priv = calloc(1, sizeof(*ctx->priv)); if (!ctx->priv) goto fail; s = ctx->priv; s->gh = cctkGH; ctx->basis[0] = &qms_sb_even_basis; #if QMS_POLAR ctx->basis[1] = &qms_cos_even_basis; #else ctx->basis[1] = &qms_sb_even_basis; #endif ctx->nb_coeffs[0] = basis_order_r; ctx->nb_coeffs[1] = basis_order_z; ctx->nb_colloc_points[0] = basis_order_r; ctx->nb_colloc_points[1] = basis_order_z; if (NB_COLLOC_POINTS(ctx) != NB_COEFFS(ctx)) CCTK_WARN(0, "Non-square collocation matrix"); s->colloc_grid_order[0] = ctx->nb_colloc_points[0]; s->colloc_grid_order[1] = ctx->nb_colloc_points[1]; ret = posix_memalign((void**)&ctx->coeffs, 32, sizeof(*ctx->coeffs) * NB_COEFFS(ctx)); ret |= posix_memalign((void**)&s->rhs, 32, sizeof(*s->rhs) * NB_COLLOC_POINTS(ctx)); if (ret) goto fail; //FIXME scale_factor = 1.0; scale_factor = (outer_bound / ctx->basis[0]->colloc_point(s->colloc_grid_order[0], ctx->nb_colloc_points[0] - 1)); fprintf(stderr, "scale factor %16.16g\n", scale_factor); init_opencl(ctx); ret = qms_pssolve_context_alloc(&s->ps_ctx); if (ret < 0) CCTK_WARN(0, "Error allocating the pseudospectral solver"); s->ps_ctx->basis[0] = ctx->basis[0]; s->ps_ctx->basis[1] = ctx->basis[1]; s->ps_ctx->solve_order[0] = basis_order_r; s->ps_ctx->solve_order[1] = basis_order_z; #if HAVE_OPENCL s->ps_ctx->ocl_ctx = s->ocl_ctx; s->ps_ctx->ocl_queue = s->ocl_queue; #endif ret = qms_pssolve_context_init(s->ps_ctx); if (ret < 0) CCTK_WARN(0, "Error initializing the pseudospectral solver"); for (int i = 0; i < MAX(s->ps_ctx->solve_order[0], s->ps_ctx->solve_order[1]); i++) { fprintf(stderr, "%d ", i); if (i < s->ps_ctx->solve_order[0]) fprintf(stderr, "%g\t", s->ps_ctx->colloc_grid[0][i]); else fprintf(stderr, "\t\t"); if (i < s->ps_ctx->solve_order[1]) fprintf(stderr, "%g\t", s->ps_ctx->colloc_grid[1][i]); fprintf(stderr, "\n"); } for (int i = 0; i < ARRAY_ELEMS(s->eq_coeffs); i++) { ret = posix_memalign((void**)&s->eq_coeffs[i], 32, NB_COLLOC_POINTS(ctx) * sizeof(*s->eq_coeffs[i])); if (ret) goto fail; } for (int i = 0; i < ARRAY_ELEMS(s->interp_coords); i++) { ret |= posix_memalign((void**)&s->interp_coords[i], 32, NB_COLLOC_POINTS(ctx) * sizeof(*s->interp_coords[i])); } if (ret) goto fail; for (int i = 0; i < ctx->nb_colloc_points[1]; i++) { for (int j = 0; j < ctx->nb_colloc_points[0]; j++) { #if QMS_POLAR double phi = s->ps_ctx->colloc_grid[1][i]; double r = s->ps_ctx->colloc_grid[0][j]; double x = r * cos(phi); double z = r * sin(phi); #else double x = s->ps_ctx->colloc_grid[0][j]; double z = s->ps_ctx->colloc_grid[1][i]; #endif s->interp_coords[0][i * ctx->nb_colloc_points[0] + j] = x; s->interp_coords[1][i * ctx->nb_colloc_points[0] + j] = 0; s->interp_coords[2][i * ctx->nb_colloc_points[0] + j] = z; } } ret = posix_memalign((void**)&s->coeff_scale, 32, NB_COEFFS(ctx) * sizeof(*s->coeff_scale)); if (ret) goto fail; for (int j = 0; j < ctx->nb_coeffs[1]; j++) for (int i = 0; i < ctx->nb_coeffs[0]; i++) { s->coeff_scale[j * ctx->nb_coeffs[0] + i] = exp(-36.0 * pow((double)i / ctx->nb_coeffs[0], filter_power)) * exp(-36.0 * pow((double)j / ctx->nb_coeffs[1], filter_power)); } for (int i = 0; i < ARRAY_ELEMS(s->interp_values); i++) { ret = posix_memalign((void**)&s->interp_values[i], 32, NB_COLLOC_POINTS(ctx) * sizeof(*s->interp_values[i])); if (ret) goto fail; s->interp_value_codes[i] = CCTK_VARIABLE_REAL; } if (ccz4) { for (int i = 0; i < ARRAY_ELEMS(metric_vars_ccz4); i++) { s->interp_vars_indices[i] = CCTK_VarIndex(metric_vars_ccz4[i]); if (s->interp_vars_indices[i] < 0) CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING, "Error getting the index of variable: %s\n", metric_vars_ccz4[i]); } } else { for (int i = 0; i < ARRAY_ELEMS(metric_vars_bssn); i++) { s->interp_vars_indices[i] = CCTK_VarIndex(metric_vars_bssn[i]); if (s->interp_vars_indices[i] < 0) CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING, "Error getting the index of variable: %s\n", metric_vars_bssn[i]); } } s->coord_system = CCTK_CoordSystemHandle("cart3d"); if (s->coord_system < 0) CCTK_WARN(0, "Error getting the coordinate system"); s->interp_operator = CCTK_InterpHandle("Lagrange polynomial interpolation (tensor product)"); if (s->interp_operator < 0) CCTK_WARN(0, "Error getting the interpolation operator"); s->interp_params = Util_TableCreateFromString("order=4 want_global_mode=1"); if (s->interp_params < 0) CCTK_WARN(0, "Error creating interpolation parameters table"); ret = Util_TableSetIntArray(s->interp_params, NB_INTERP_VARS, interp_operation_codes, "operation_codes"); if (ret < 0) CCTK_WARN(0, "Error setting operation codes"); ret = Util_TableSetIntArray(s->interp_params, NB_INTERP_VARS, interp_operation_indices, "operand_indices"); if (ret < 0) CCTK_WARN(0, "Error setting operand indices"); CCTK_TimerCreate("QuasiMaximalSlicing_Solve"); CCTK_TimerCreate("QuasiMaximalSlicing_Expand"); CCTK_TimerCreate("QuasiMaximalSlicing_interp_geometry"); CCTK_TimerCreate("QuasiMaximalSlicing_calc_eq_coeffs"); CCTK_TimerCreate("QuasiMaximalSlicing_construct_matrix"); CCTK_TimerCreate("QuasiMaximalSlicing_solve_LU"); CCTK_TimerCreate("QuasiMaximalSlicing_solve_BiCGSTAB"); *pctx = ctx; return 0; fail: qms_solver_free(&ctx); return -ENOMEM; } void qms_solver_free(QMSSolver **pctx) { QMSSolver *ctx = *pctx; if (!ctx) return; if (ctx->priv) { for (int i = 0; i < ARRAY_ELEMS(ctx->priv->interp_coords); i++) free(ctx->priv->interp_coords[i]); for (int i = 0; i < ARRAY_ELEMS(ctx->priv->interp_values); i++) free(ctx->priv->interp_values[i]); for (int i = 0; i < ARRAY_ELEMS(ctx->priv->eq_coeffs); i++) free(ctx->priv->eq_coeffs[i]); free(ctx->priv->rhs); free(ctx->priv->coeff_scale); qms_pssolve_context_free(&ctx->priv->ps_ctx); #if HAVE_OPENCL if (ctx->priv->ocl_queue) clReleaseCommandQueue(ctx->priv->ocl_queue); if (ctx->priv->ocl_ctx) clReleaseContext(ctx->priv->ocl_ctx); #endif } free(ctx->priv); free(ctx->coeffs); free(ctx); *pctx = NULL; }