summaryrefslogtreecommitdiff
path: root/src/qms_solve.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/qms_solve.c')
-rw-r--r--src/qms_solve.c880
1 files changed, 880 insertions, 0 deletions
diff --git a/src/qms_solve.c b/src/qms_solve.c
new file mode 100644
index 0000000..7136f38
--- /dev/null
+++ b/src/qms_solve.c
@@ -0,0 +1,880 @@
+/*
+ * Quasimaximal slicing -- actual solver code
+ * Copyright (C) 2016 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "common.h"
+
+#include <errno.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#if HAVE_OPENCL
+#include <cl.h>
+#include <clBLAS.h>
+#endif
+
+#include "cctk.h"
+#include "cctk_Timers.h"
+#include "util_Table.h"
+
+#include "basis.h"
+#include "pssolve.h"
+#include "qms_solve.h"
+
+#define NB_COEFFS(qms) (qms->nb_coeffs[0] * qms->nb_coeffs[1])
+#define NB_COLLOC_POINTS(qms) (qms->nb_colloc_points[0] * qms->nb_colloc_points[1])
+
+/* indices (in our code, not cactus structs) of the grid functions which we'll need to
+ * interpolate on the pseudospectral grid */
+enum MetricVars {
+ GTXX = 0,
+ GTYY,
+ GTZZ,
+ GTXY,
+ GTXZ,
+ GTYZ,
+ PHI,
+ ATXX,
+ ATYY,
+ ATZZ,
+ ATXY,
+ ATXZ,
+ ATYZ,
+ K,
+ XTX,
+ XTY,
+ XTZ,
+ BETAX,
+ BETAY,
+ BETAZ,
+ ALPHA,
+ KDOT_XX,
+ KDOT_YY,
+ KDOT_ZZ,
+ KDOT_XY,
+ KDOT_XZ,
+ KDOT_YZ,
+ XTDOT_X,
+ XTDOT_Y,
+ XTDOT_Z,
+ PHIDOT,
+ NB_METRIC_VARS,
+};
+
+/* indices of the interpolated values of the above grid functions and their derivatives */
+enum InterpMetricVars {
+ I_GTXX = 0,
+ I_GTYY,
+ I_GTZZ,
+ I_GTXY,
+ I_GTXZ,
+ I_GTYZ,
+ I_PHI,
+ I_PHI_DX,
+ I_PHI_DY,
+ I_PHI_DZ,
+ I_ATXX,
+ I_ATYY,
+ I_ATZZ,
+ I_ATXY,
+ I_ATXZ,
+ I_ATYZ,
+ I_K,
+ I_XTX,
+ I_XTY,
+ I_XTZ,
+ I_BETAX,
+ I_BETAY,
+ I_BETAZ,
+ I_ALPHA,
+ I_ALPHA_DX,
+ I_ALPHA_DY,
+ I_ALPHA_DZ,
+ I_ALPHA_DXX,
+ I_ALPHA_DYY,
+ I_ALPHA_DZZ,
+ I_ALPHA_DXY,
+ I_ALPHA_DXZ,
+ I_ALPHA_DYZ,
+ I_KDOT_XX,
+ I_KDOT_YY,
+ I_KDOT_ZZ,
+ I_KDOT_XY,
+ I_KDOT_XZ,
+ I_KDOT_YZ,
+ I_XTDOT_X,
+ I_XTDOT_Y,
+ I_XTDOT_Z,
+ I_PHIDOT,
+ I_PHIDOT_DX,
+ I_PHIDOT_DY,
+ I_PHIDOT_DZ,
+ NB_INTERP_VARS,
+};
+
+struct QMSSolverPriv {
+ PSSolveContext *ps_ctx;
+ cGH *gh;
+
+ int colloc_grid_order[2];
+
+ double *eq_coeffs[PSSOLVE_DIFF_ORDER_NB];
+ double *rhs;
+
+ double *coeff_scale;
+
+ // interpolation parameters
+ int coord_system;
+ int interp_operator;
+ int interp_params;
+
+ CCTK_REAL *interp_coords[3];
+
+ int interp_vars_indices[NB_METRIC_VARS];
+ CCTK_REAL *interp_values[NB_INTERP_VARS];
+ CCTK_INT interp_value_codes[NB_INTERP_VARS];
+
+#if HAVE_OPENCL
+ // OpenCL / CLBLAS stuff
+ cl_context ocl_ctx;
+ cl_command_queue ocl_queue;
+#endif
+
+ uint64_t solve_count;
+ uint64_t solve_time;
+
+ uint64_t interp_geometry_count;
+ uint64_t interp_geometry_time;
+
+ uint64_t calc_eq_coeffs_count;
+ uint64_t calc_eq_coeffs_time;
+};
+
+/* mapping between our indices and thorn names */
+static const char *metric_vars[] = {
+#if QMS_CCZ4
+ [GTXX] = "ML_CCZ4::gt11",
+ [GTYY] = "ML_CCZ4::gt22",
+ [GTZZ] = "ML_CCZ4::gt33",
+ [GTXY] = "ML_CCZ4::gt12",
+ [GTXZ] = "ML_CCZ4::gt13",
+ [GTYZ] = "ML_CCZ4::gt23",
+ [ATXX] = "ML_CCZ4::At11",
+ [ATYY] = "ML_CCZ4::At22",
+ [ATZZ] = "ML_CCZ4::At33",
+ [ATXY] = "ML_CCZ4::At12",
+ [ATXZ] = "ML_CCZ4::At13",
+ [ATYZ] = "ML_CCZ4::At23",
+ [PHI] = "ML_CCZ4::phi",
+ [K] = "ML_CCZ4::trK",
+ [XTX] = "ML_CCZ4::Xt1",
+ [XTY] = "ML_CCZ4::Xt2",
+ [XTZ] = "ML_CCZ4::Xt3",
+ [BETAX] = "ML_CCZ4::beta1",
+ [BETAY] = "ML_CCZ4::beta2",
+ [BETAZ] = "ML_CCZ4::beta3",
+ [ALPHA] = "ML_CCZ4::alpha",
+ [KDOT_XX] = "ML_CCZ4::Kdot11",
+ [KDOT_YY] = "ML_CCZ4::Kdot22",
+ [KDOT_ZZ] = "ML_CCZ4::Kdot33",
+ [KDOT_XY] = "ML_CCZ4::Kdot12",
+ [KDOT_XZ] = "ML_CCZ4::Kdot13",
+ [KDOT_YZ] = "ML_CCZ4::Kdot23",
+ [XTDOT_X] = "ML_CCZ4::Xtdot1",
+ [XTDOT_Y] = "ML_CCZ4::Xtdot2",
+ [XTDOT_Z] = "ML_CCZ4::Xtdot3",
+ [PHIDOT] = "ML_CCZ4::phidot",
+#else
+ [GTXX] = "ML_BSSN::gt11",
+ [GTYY] = "ML_BSSN::gt22",
+ [GTZZ] = "ML_BSSN::gt33",
+ [GTXY] = "ML_BSSN::gt12",
+ [GTXZ] = "ML_BSSN::gt13",
+ [GTYZ] = "ML_BSSN::gt23",
+ [ATXX] = "ML_BSSN::At11",
+ [ATYY] = "ML_BSSN::At22",
+ [ATZZ] = "ML_BSSN::At33",
+ [ATXY] = "ML_BSSN::At12",
+ [ATXZ] = "ML_BSSN::At13",
+ [ATYZ] = "ML_BSSN::At23",
+ [PHI] = "ML_BSSN::phi",
+ [K] = "ML_BSSN::trK",
+ [XTX] = "ML_BSSN::Xt1",
+ [XTY] = "ML_BSSN::Xt2",
+ [XTZ] = "ML_BSSN::Xt3",
+ [BETAX] = "ML_BSSN::beta1",
+ [BETAY] = "ML_BSSN::beta2",
+ [BETAZ] = "ML_BSSN::beta3",
+ [ALPHA] = "ML_BSSN::alpha",
+ //[ALPHA] = "ADMBase::alp",
+ [KDOT_XX] = "ML_BSSN::Kdot11",
+ [KDOT_YY] = "ML_BSSN::Kdot22",
+ [KDOT_ZZ] = "ML_BSSN::Kdot33",
+ [KDOT_XY] = "ML_BSSN::Kdot12",
+ [KDOT_XZ] = "ML_BSSN::Kdot13",
+ [KDOT_YZ] = "ML_BSSN::Kdot23",
+ [XTDOT_X] = "ML_BSSN::Xtdot1",
+ [XTDOT_Y] = "ML_BSSN::Xtdot2",
+ [XTDOT_Z] = "ML_BSSN::Xtdot3",
+ [PHIDOT] = "ML_BSSN::phidot",
+#endif
+};
+
+/* mapping between the cactus grid values and interpolated values */
+static const CCTK_INT interp_operation_indices[] = {
+ [I_GTXX] = GTXX,
+ [I_GTYY] = GTYY,
+ [I_GTZZ] = GTZZ,
+ [I_GTXY] = GTXY,
+ [I_GTXZ] = GTXZ,
+ [I_GTYZ] = GTYZ,
+ [I_PHI] = PHI,
+ [I_PHI_DX] = PHI,
+ [I_PHI_DY] = PHI,
+ [I_PHI_DZ] = PHI,
+ [I_ATXX] = ATXX,
+ [I_ATYY] = ATYY,
+ [I_ATZZ] = ATZZ,
+ [I_ATXY] = ATXY,
+ [I_ATXZ] = ATXZ,
+ [I_ATYZ] = ATYZ,
+ [I_K] = K,
+ [I_XTX] = XTX,
+ [I_XTY] = XTY,
+ [I_XTZ] = XTZ,
+ [I_BETAX] = BETAX,
+ [I_BETAY] = BETAY,
+ [I_BETAZ] = BETAZ,
+ [I_ALPHA] = ALPHA,
+ [I_ALPHA_DX] = ALPHA,
+ [I_ALPHA_DY] = ALPHA,
+ [I_ALPHA_DZ] = ALPHA,
+ [I_ALPHA_DXX] = ALPHA,
+ [I_ALPHA_DYY] = ALPHA,
+ [I_ALPHA_DZZ] = ALPHA,
+ [I_ALPHA_DXY] = ALPHA,
+ [I_ALPHA_DXZ] = ALPHA,
+ [I_ALPHA_DYZ] = ALPHA,
+ [I_KDOT_XX] = KDOT_XX,
+ [I_KDOT_YY] = KDOT_YY,
+ [I_KDOT_ZZ] = KDOT_ZZ,
+ [I_KDOT_XY] = KDOT_XY,
+ [I_KDOT_XZ] = KDOT_XZ,
+ [I_KDOT_YZ] = KDOT_YZ,
+ [I_XTDOT_X] = XTDOT_X,
+ [I_XTDOT_Y] = XTDOT_Y,
+ [I_XTDOT_Z] = XTDOT_Z,
+ [I_PHIDOT] = PHIDOT,
+ [I_PHIDOT_DX] = PHIDOT,
+ [I_PHIDOT_DY] = PHIDOT,
+ [I_PHIDOT_DZ] = PHIDOT,
+};
+
+/* the operation (plain value or x/y/z-derivative) to apply during interpolation */
+static const CCTK_INT interp_operation_codes[] = {
+ [I_GTXX] = 0,
+ [I_GTYY] = 0,
+ [I_GTZZ] = 0,
+ [I_GTXY] = 0,
+ [I_GTXZ] = 0,
+ [I_GTYZ] = 0,
+ [I_PHI] = 0,
+ [I_PHI_DX] = 1,
+ [I_PHI_DY] = 2,
+ [I_PHI_DZ] = 3,
+ [I_ATXX] = 0,
+ [I_ATYY] = 0,
+ [I_ATZZ] = 0,
+ [I_ATXY] = 0,
+ [I_ATXZ] = 0,
+ [I_ATYZ] = 0,
+ [I_K] = 0,
+ [I_XTX] = 0,
+ [I_XTY] = 0,
+ [I_XTZ] = 0,
+ [I_BETAX] = 0,
+ [I_BETAY] = 0,
+ [I_BETAZ] = 0,
+ [I_ALPHA] = 0,
+ [I_ALPHA_DX] = 1,
+ [I_ALPHA_DY] = 2,
+ [I_ALPHA_DZ] = 3,
+ [I_ALPHA_DXX] = 11,
+ [I_ALPHA_DYY] = 22,
+ [I_ALPHA_DZZ] = 33,
+ [I_ALPHA_DXY] = 12,
+ [I_ALPHA_DXZ] = 13,
+ [I_ALPHA_DYZ] = 23,
+ [I_KDOT_XX] = 0,
+ [I_KDOT_YY] = 0,
+ [I_KDOT_ZZ] = 0,
+ [I_KDOT_XY] = 0,
+ [I_KDOT_XZ] = 0,
+ [I_KDOT_YZ] = 0,
+ [I_XTDOT_X] = 0,
+ [I_XTDOT_Y] = 0,
+ [I_XTDOT_Z] = 0,
+ [I_PHIDOT] = 0,
+ [I_PHIDOT_DX] = 1,
+ [I_PHIDOT_DY] = 2,
+ [I_PHIDOT_DZ] = 3,
+};
+
+/* interpolate the cactus gridfunctions onto the pseudospectral grid */
+static int interp_geometry(QMSSolver *ctx)
+{
+ QMSSolverPriv *s = ctx->priv;
+ int ret;
+
+ ret = CCTK_InterpGridArrays(s->gh, 3, s->interp_operator, s->interp_params,
+ s->coord_system, NB_COLLOC_POINTS(ctx), CCTK_VARIABLE_REAL,
+ (const void * const *)s->interp_coords, ARRAY_ELEMS(s->interp_vars_indices), s->interp_vars_indices,
+ ARRAY_ELEMS(s->interp_values), s->interp_value_codes, (void * const *)s->interp_values);
+ if (ret < 0)
+ CCTK_WARN(0, "Error interpolating");
+
+ return 0;
+}
+
+/* evaluate the equation coefficients at the collocation points */
+static int calc_eq_coeffs(QMSSolver *ctx)
+{
+ QMSSolverPriv *s = ctx->priv;
+
+//#pragma omp parallel for schedule(dynamic, ms->nb_colloc_points_x)
+ for (int i = 0; i < NB_COLLOC_POINTS(ctx); i++) {
+ const double x = s->interp_coords[0][i];
+ const double z = s->interp_coords[2][i];
+ const int zaxis = x <= EPS;
+
+ double Am[3][3], K[3][3], Km[3][3], Ku[3][3], gtu[3][3];
+ double k2, kij_dij_alpha, k_kdot, k3;
+
+ const double gtxx = s->interp_values[I_GTXX][i];
+ const double gtyy = s->interp_values[I_GTYY][i];
+ const double gtzz = s->interp_values[I_GTZZ][i];
+ const double gtxy = s->interp_values[I_GTXY][i];
+ const double gtxz = s->interp_values[I_GTXZ][i];
+ const double gtyz = s->interp_values[I_GTYZ][i];
+
+ const double gt[3][3] = {{ gtxx, gtxy, gtxz },
+ { gtxy, gtyy, gtyz },
+ { gtxz, gtyz, gtzz }};
+
+ const double Atxx = s->interp_values[I_ATXX][i];
+ const double Atyy = s->interp_values[I_ATYY][i];
+ const double Atzz = s->interp_values[I_ATZZ][i];
+ const double Atxy = s->interp_values[I_ATXY][i];
+ const double Atxz = s->interp_values[I_ATXZ][i];
+ const double Atyz = s->interp_values[I_ATYZ][i];
+
+ const double phi = s->interp_values[I_PHI][i];
+
+ const double phidot = s->interp_values[I_PHIDOT][i];
+ const double phidot_dx = s->interp_values[I_PHIDOT_DX][i];
+ const double phidot_dz = s->interp_values[I_PHIDOT_DZ][i];
+
+ const double At[3][3] = {{ Atxx, Atxy, Atxz },
+ { Atxy, Atyy, Atyz },
+ { Atxz, Atyz, Atzz }};
+
+ const double trK = s->interp_values[I_K][i];
+ const double kdot_xx = s->interp_values[I_KDOT_XX][i];
+ const double kdot_yy = s->interp_values[I_KDOT_YY][i];
+ const double kdot_zz = s->interp_values[I_KDOT_ZZ][i];
+ const double kdot_xy = s->interp_values[I_KDOT_XY][i];
+ const double kdot_xz = s->interp_values[I_KDOT_XZ][i];
+ const double kdot_yz = s->interp_values[I_KDOT_YZ][i];
+
+ const double kdot[3][3] = {{ kdot_xx, kdot_xy, kdot_xz },
+ { kdot_xy, kdot_yy, kdot_yz },
+ { kdot_xz, kdot_yz, kdot_zz }};
+
+ const double alpha = s->interp_values[I_ALPHA][i];
+ const double dx_alpha = s->interp_values[I_ALPHA_DX][i];
+ const double dz_alpha = s->interp_values[I_ALPHA_DZ][i];
+ const double dxx_alpha = s->interp_values[I_ALPHA_DXX][i];
+ const double dzz_alpha = s->interp_values[I_ALPHA_DZZ][i];
+ const double dxz_alpha = s->interp_values[I_ALPHA_DXZ][i];
+
+ const double dij_alpha[3][3] = {{ dxx_alpha, 0, dxz_alpha },
+ { 0, zaxis ? dxx_alpha : dx_alpha / x, 0 },
+ { dxz_alpha, 0, dzz_alpha }};
+
+ const double Xtx = s->interp_values[I_XTX][i];
+ const double Xtz = s->interp_values[I_XTZ][i];
+
+ const double Xtdot_x = s->interp_values[I_XTDOT_X][i];
+ const double Xtdot_z = s->interp_values[I_XTDOT_Z][i];
+
+ const double det = gtxx * gtyy * gtzz + 2 * gtxy * gtyz * gtxz - gtzz * SQR(gtxy) - SQR(gtxz) * gtyy - gtxx * SQR(gtyz);
+
+ // \tilde{γ}^{ij}
+ gtu[0][0] = (gtyy * gtzz - SQR(gtyz)) / det;
+ gtu[1][1] = (gtxx * gtzz - SQR(gtxz)) / det;
+ gtu[2][2] = (gtxx * gtyy - SQR(gtxy)) / det;
+ gtu[0][1] = -(gtxy * gtzz - gtyz * gtxz) / det;
+ gtu[0][2] = (gtxy * gtyz - gtyy * gtxz) / det;
+ gtu[1][2] = -(gtxx * gtyz - gtxy * gtxz) / det;
+ gtu[1][0] = gtu[0][1];
+ gtu[2][0] = gtu[0][2];
+ gtu[2][1] = gtu[1][2];
+
+ // K_{ij}
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++)
+ K[j][k] = At[j][k] / SQR(phi) + gt[j][k] * trK;
+
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++) {
+ double val = 0.0;
+ for (int l = 0; l < 3; l++)
+ val += SQR(phi) * gtu[j][l] * K[l][k];
+ Km[j][k] = val;
+ }
+
+ // K^{ij}
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++) {
+ double val = 0.0;
+ for (int l = 0; l < 3; l++)
+ val += SQR(phi) * gtu[j][l] * Km[k][l];
+ Ku[j][k] = val;
+ }
+
+ // \tilde{A}_{i}^j
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++) {
+ double val = 0.0;
+ for (int l = 0; l < 3; l++)
+ val += gtu[j][l] * At[l][k];
+ Am[j][k] = val;
+ }
+
+ kij_dij_alpha = 0.0;
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++)
+ kij_dij_alpha += Ku[j][k] * dij_alpha[j][k];
+
+ k_kdot = 0.0;
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++)
+ k_kdot += kdot[j][k] * Ku[j][k];
+
+ k3 = 0.0;
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++) {
+ double val = 0.0;
+ for (int l = 0; l < 3; l++)
+ val += Km[k][l] * Ku[l][j];
+ k3 += val * K[j][k];
+ }
+
+ // K_{ij} K^{ij}
+ k2 = 0.0;
+ for (int j = 0; j < 3; j++)
+ for (int k = 0; k < 3; k++)
+ k2 += Km[j][k] * Km[k][j];
+
+ {
+ const double gtuxx = gtu[0][0];
+ const double gtuyy = gtu[1][1];
+ const double gtuzz = gtu[2][2];
+ const double gtuxz = gtu[0][2];
+
+ const double phi_dx = s->interp_values[I_PHI_DX][i];
+ const double phi_dz = s->interp_values[I_PHI_DZ][i];
+
+ const double Xtx = s->interp_values[I_XTX][i];
+ const double Xtz = s->interp_values[I_XTZ][i];
+
+ const double betax = s->interp_values[I_BETAX][i];
+ const double betaz = s->interp_values[I_BETAZ][i];
+
+ const double Xx = SQR(phi) * (Xtx + (phi_dx * gtuxx + phi_dz * gtuxz) / phi);
+ const double Xz = SQR(phi) * (Xtz + (phi_dx * gtuxz + phi_dz * gtuzz) / phi);
+
+ const double Xdot_x = 2 * phi * phidot * Xtx + SQR(phi) * Xtdot_x + phi * (phidot_dx * gtuxx + phidot_dz * gtuxz) -
+ phidot * (phi_dx * gtuxx + phi_dz * gtuxz) + 2 * alpha * (phi_dx * Ku[0][0] + phi_dz * Ku[0][2]) / phi;
+ const double Xdot_z = 2 * phi * phidot * Xtz + SQR(phi) * Xtdot_z + phi * (phidot_dz * gtuzz + phidot_dx * gtuxz) -
+ phidot * (phi_dz * gtuzz + phi_dx * gtuxz) + 2 * alpha * (phi_dz * Ku[2][2] + phi_dx * Ku[0][2]) / phi;
+
+ s->eq_coeffs[PSSOLVE_DIFF_ORDER_20][i] = SQR(phi) * (gtuxx + ((x <= EPS) ? gtuyy : 0.0));
+ s->eq_coeffs[PSSOLVE_DIFF_ORDER_02][i] = SQR(phi) * gtuzz;
+ s->eq_coeffs[PSSOLVE_DIFF_ORDER_11][i] = SQR(phi) * gtuxz * 2;
+ s->eq_coeffs[PSSOLVE_DIFF_ORDER_10][i] = -Xx + ((x > EPS) ? SQR(phi) * gtuyy / x : 0.0);
+ s->eq_coeffs[PSSOLVE_DIFF_ORDER_01][i] = -Xz;
+ s->eq_coeffs[PSSOLVE_DIFF_ORDER_00][i] = -k2;
+
+ s->rhs[i] = -2 * alpha * kij_dij_alpha + Xdot_x * dx_alpha + Xdot_z * dz_alpha +
+ 2 * (k_kdot + 2 * alpha * k3) * alpha;
+ }
+ }
+
+ return 0;
+}
+
+int qms_solver_solve(QMSSolver *ctx)
+{
+ QMSSolverPriv *s = ctx->priv;
+ int ret;
+ int64_t start, totaltime_start;
+
+ totaltime_start = gettime();
+
+ /* interpolate the metric values and construct the quantities we'll need */
+ CCTK_TimerStart("QuasiMaximalSlicing_interp_geometry");
+ start = gettime();
+
+ ret = interp_geometry(ctx);
+
+ s->interp_geometry_time += gettime() - start;
+ s->interp_geometry_count++;
+ CCTK_TimerStop("QuasiMaximalSlicing_interp_geometry");
+ if (ret < 0)
+ return ret;
+
+ CCTK_TimerStart("QuasiMaximalSlicing_calc_eq_coeffs");
+ start = gettime();
+
+ ret = calc_eq_coeffs(ctx);
+
+ s->calc_eq_coeffs_time += gettime() - start;
+ s->calc_eq_coeffs_count++;
+ CCTK_TimerStop("QuasiMaximalSlicing_calc_eq_coeffs");
+ if (ret < 0)
+ return ret;
+
+ ret = qms_pssolve_solve(s->ps_ctx, (const double * const *)s->eq_coeffs,
+ s->rhs, ctx->coeffs);
+ if (ret < 0)
+ return ret;
+
+ for (int i = 0; i < NB_COEFFS(ctx); i++)
+ ctx->coeffs[i] *= s->coeff_scale[i];
+
+ s->solve_count++;
+ s->solve_time += gettime() - totaltime_start;
+
+ return 0;
+}
+
+void qms_solver_print_stats(QMSSolver *ctx)
+{
+ QMSSolverPriv *s = ctx->priv;
+
+ fprintf(stderr,
+ "%g%% interpolate geometry: %lu, "
+ "total time %g s, avg time per call %g ms\n",
+ (double)s->interp_geometry_time * 100 / s->solve_time,
+ s->interp_geometry_count, (double)s->interp_geometry_time / 1e6,
+ (double)s->interp_geometry_time / s->interp_geometry_count / 1e3);
+ fprintf(stderr,
+ "%g%% calc equation coefficients: %lu, "
+ "total time %g s, avg time per call %g ms\n",
+ (double)s->calc_eq_coeffs_time * 100 / s->solve_time,
+ s->calc_eq_coeffs_count, (double)s->calc_eq_coeffs_time / 1e6,
+ (double)s->calc_eq_coeffs_time / s->calc_eq_coeffs_count / 1e3);
+ fprintf(stderr,
+ "%g%% pseudospectral matrix construction: %lu, "
+ "total time %g s, avg time per call %g ms\n",
+ (double)s->ps_ctx->construct_matrix_time * 100 / s->solve_time,
+ s->ps_ctx->construct_matrix_count, (double)s->ps_ctx->construct_matrix_time / 1e6,
+ (double)s->ps_ctx->construct_matrix_time / s->ps_ctx->construct_matrix_count / 1e3);
+ fprintf(stderr,
+ "%g%% BiCGSTAB %lu solves, "
+ "%lu iterations, total time %g s, "
+ "avg iterations per solve %g, avg time per solve %g ms, "
+ "avg time per iteration %g ms\n",
+ (double)s->ps_ctx->cg_time_total * 100 / s->solve_time,
+ s->ps_ctx->cg_solve_count, s->ps_ctx->cg_iter_count, (double)s->ps_ctx->cg_time_total / 1e6,
+ (double)s->ps_ctx->cg_iter_count / s->ps_ctx->cg_solve_count,
+ (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_solve_count / 1e3,
+ (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_iter_count / 1e3);
+ fprintf(stderr,
+ "%g%% LU %lu solves, total time %g s, avg time per solve %g ms\n",
+ (double)s->ps_ctx->lu_solves_time * 100 / s->solve_time,
+ s->ps_ctx->lu_solves_count, (double)s->ps_ctx->lu_solves_time / 1e6,
+ (double)s->ps_ctx->lu_solves_time / s->ps_ctx->lu_solves_count / 1e3);
+}
+
+static void init_opencl(QMSSolver *ctx)
+#if HAVE_OPENCL
+{
+ QMSSolverPriv *s = ctx->priv;
+ int err, count;
+ cl_platform_id platform;
+ cl_context_properties props[3];
+ cl_device_id ocl_device;
+
+ err = clGetPlatformIDs(1, &platform, &count);
+ if (err != CL_SUCCESS || count < 1) {
+ fprintf(stderr, "Could not get an OpenCL platform ID\n");
+ return;
+ }
+
+ err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ocl_device, &count);
+ if (err != CL_SUCCESS || count < 1) {
+ fprintf(stderr, "Could not get an OpenCL device ID\n");
+ return;
+ }
+
+ props[0] = CL_CONTEXT_PLATFORM;
+ props[1] = (cl_context_properties)platform;
+ props[2] = 0;
+
+ s->ocl_ctx = clCreateContext(props, 1, &ocl_device, NULL, NULL, &err);
+ if (err != CL_SUCCESS || !s->ocl_ctx) {
+ fprintf(stderr, "Could not create an OpenCL context\n");
+ return;
+ }
+
+ s->ocl_queue = clCreateCommandQueue(s->ocl_ctx, ocl_device, 0, &err);
+ if (err != CL_SUCCESS || !s->ocl_queue) {
+ fprintf(stderr, "Could not create an OpenCL command queue: %d\n", err);
+ goto fail;
+ }
+
+ err = clblasSetup();
+ if (err != CL_SUCCESS) {
+ fprintf(stderr, "Error setting up clBLAS\n");
+ goto fail;
+ }
+
+ return;
+fail:
+ if (s->ocl_queue)
+ clReleaseCommandQueue(s->ocl_queue);
+ s->ocl_queue = 0;
+
+ if (s->ocl_ctx)
+ clReleaseContext(s->ocl_ctx);
+ s->ocl_ctx = 0;
+}
+#else
+{
+}
+#endif
+
+int qms_solver_init(QMSSolver **pctx,
+ cGH *cctkGH,
+ int basis_order_r, int basis_order_z,
+ double sf, double filter_power, double input_filter_power)
+{
+ QMSSolver *ctx;
+ QMSSolverPriv *s;
+ int ret;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->priv = calloc(1, sizeof(*ctx->priv));
+ if (!ctx->priv)
+ goto fail;
+ s = ctx->priv;
+
+ s->gh = cctkGH;
+
+ ctx->basis[0] = &qms_sb_even_basis;
+#if QMS_POLAR
+ ctx->basis[1] = &qms_cos_even_basis;
+#else
+ ctx->basis[1] = &qms_sb_even_basis;
+#endif
+
+ ctx->nb_coeffs[0] = basis_order_r;
+ ctx->nb_coeffs[1] = basis_order_z;
+
+ ctx->nb_colloc_points[0] = basis_order_r;
+ ctx->nb_colloc_points[1] = basis_order_z;
+
+ if (NB_COLLOC_POINTS(ctx) != NB_COEFFS(ctx))
+ CCTK_WARN(0, "Non-square collocation matrix");
+
+ s->colloc_grid_order[0] = ctx->nb_colloc_points[0];
+ s->colloc_grid_order[1] = ctx->nb_colloc_points[1];
+
+ ret = posix_memalign((void**)&ctx->coeffs, 32, sizeof(*ctx->coeffs) * NB_COEFFS(ctx));
+ ret |= posix_memalign((void**)&s->rhs, 32, sizeof(*s->rhs) * NB_COLLOC_POINTS(ctx));
+ if (ret)
+ goto fail;
+
+ //FIXME
+ scale_factor = 1.0;
+ scale_factor = (64.0 / ctx->basis[0]->colloc_point(s->colloc_grid_order[0], ctx->nb_colloc_points[0] - 1));
+ fprintf(stderr, "scale factor %16.16g\n", scale_factor);
+
+ init_opencl(ctx);
+
+ ret = qms_pssolve_context_alloc(&s->ps_ctx);
+ if (ret < 0)
+ CCTK_WARN(0, "Error allocating the pseudospectral solver");
+
+ s->ps_ctx->basis[0] = ctx->basis[0];
+ s->ps_ctx->basis[1] = ctx->basis[1];
+ s->ps_ctx->solve_order[0] = basis_order_r;
+ s->ps_ctx->solve_order[1] = basis_order_z;
+#if HAVE_OPENCL
+ s->ps_ctx->ocl_ctx = s->ocl_ctx;
+ s->ps_ctx->ocl_queue = s->ocl_queue;
+#endif
+
+ ret = qms_pssolve_context_init(s->ps_ctx);
+ if (ret < 0)
+ CCTK_WARN(0, "Error initializing the pseudospectral solver");
+
+ for (int i = 0; i < MAX(s->ps_ctx->solve_order[0], s->ps_ctx->solve_order[1]); i++) {
+ fprintf(stderr, "%d ", i);
+ if (i < s->ps_ctx->solve_order[0])
+ fprintf(stderr, "%g\t", s->ps_ctx->colloc_grid[0][i]);
+ else
+ fprintf(stderr, "\t\t");
+ if (i < s->ps_ctx->solve_order[1])
+ fprintf(stderr, "%g\t", s->ps_ctx->colloc_grid[1][i]);
+ fprintf(stderr, "\n");
+ }
+
+ for (int i = 0; i < ARRAY_ELEMS(s->eq_coeffs); i++) {
+ ret = posix_memalign((void**)&s->eq_coeffs[i], 32,
+ NB_COLLOC_POINTS(ctx) * sizeof(*s->eq_coeffs[i]));
+ if (ret)
+ goto fail;
+ }
+
+ for (int i = 0; i < ARRAY_ELEMS(s->interp_coords); i++) {
+ ret |= posix_memalign((void**)&s->interp_coords[i], 32,
+ NB_COLLOC_POINTS(ctx) * sizeof(*s->interp_coords[i]));
+ }
+ if (ret)
+ goto fail;
+
+ for (int i = 0; i < ctx->nb_colloc_points[1]; i++) {
+ for (int j = 0; j < ctx->nb_colloc_points[0]; j++) {
+#if QMS_POLAR
+ double phi = s->ps_ctx->colloc_grid[1][i];
+ double r = s->ps_ctx->colloc_grid[0][j];
+
+ double x = r * cos(phi);
+ double z = r * sin(phi);
+#else
+ double x = s->ps_ctx->colloc_grid[0][j];
+ double z = s->ps_ctx->colloc_grid[1][i];
+#endif
+
+ s->interp_coords[0][i * ctx->nb_colloc_points[0] + j] = x;
+ s->interp_coords[1][i * ctx->nb_colloc_points[0] + j] = 0;
+ s->interp_coords[2][i * ctx->nb_colloc_points[0] + j] = z;
+ }
+ }
+
+ ret = posix_memalign((void**)&s->coeff_scale, 32, NB_COEFFS(ctx) * sizeof(*s->coeff_scale));
+ if (ret)
+ goto fail;
+ for (int j = 0; j < ctx->nb_coeffs[1]; j++)
+ for (int i = 0; i < ctx->nb_coeffs[0]; i++) {
+ s->coeff_scale[j * ctx->nb_coeffs[0] + i] = exp(-36.0 * pow((double)i / ctx->nb_coeffs[0], filter_power)) *
+ exp(-36.0 * pow((double)j / ctx->nb_coeffs[1], filter_power));
+ }
+
+ for (int i = 0; i < ARRAY_ELEMS(s->interp_values); i++) {
+ ret = posix_memalign((void**)&s->interp_values[i], 32,
+ NB_COLLOC_POINTS(ctx) * sizeof(*s->interp_values[i]));
+ if (ret)
+ goto fail;
+ s->interp_value_codes[i] = CCTK_VARIABLE_REAL;
+ }
+
+ for (int i = 0; i < ARRAY_ELEMS(metric_vars); i++) {
+ s->interp_vars_indices[i] = CCTK_VarIndex(metric_vars[i]);
+ if (s->interp_vars_indices[i] < 0)
+ CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING, "Error getting the index of variable: %s\n", metric_vars[i]);
+ }
+
+ s->coord_system = CCTK_CoordSystemHandle("cart3d");
+ if (s->coord_system < 0)
+ CCTK_WARN(0, "Error getting the coordinate system");
+
+ s->interp_operator = CCTK_InterpHandle("Lagrange polynomial interpolation (tensor product)");
+ if (s->interp_operator < 0)
+ CCTK_WARN(0, "Error getting the interpolation operator");
+
+ s->interp_params = Util_TableCreateFromString("order=4 want_global_mode=1");
+ if (s->interp_params < 0)
+ CCTK_WARN(0, "Error creating interpolation parameters table");
+
+ ret = Util_TableSetIntArray(s->interp_params, NB_INTERP_VARS,
+ interp_operation_codes, "operation_codes");
+ if (ret < 0)
+ CCTK_WARN(0, "Error setting operation codes");
+
+ ret = Util_TableSetIntArray(s->interp_params, NB_INTERP_VARS,
+ interp_operation_indices, "operand_indices");
+ if (ret < 0)
+ CCTK_WARN(0, "Error setting operand indices");
+
+ CCTK_TimerCreate("QuasiMaximalSlicing_Solve");
+ CCTK_TimerCreate("QuasiMaximalSlicing_Expand");
+ CCTK_TimerCreate("QuasiMaximalSlicing_interp_geometry");
+ CCTK_TimerCreate("QuasiMaximalSlicing_calc_eq_coeffs");
+ CCTK_TimerCreate("QuasiMaximalSlicing_construct_matrix");
+ CCTK_TimerCreate("QuasiMaximalSlicing_solve_LU");
+ CCTK_TimerCreate("QuasiMaximalSlicing_solve_BiCGSTAB");
+
+ *pctx = ctx;
+ return 0;
+fail:
+ qms_solver_free(&ctx);
+ return -ENOMEM;
+}
+
+void qms_solver_free(QMSSolver **pctx)
+{
+ QMSSolver *ctx = *pctx;
+
+ if (!ctx)
+ return;
+
+ if (ctx->priv) {
+ for (int i = 0; i < ARRAY_ELEMS(ctx->priv->interp_coords); i++)
+ free(ctx->priv->interp_coords[i]);
+ for (int i = 0; i < ARRAY_ELEMS(ctx->priv->interp_values); i++)
+ free(ctx->priv->interp_values[i]);
+ for (int i = 0; i < ARRAY_ELEMS(ctx->priv->eq_coeffs); i++)
+ free(ctx->priv->eq_coeffs[i]);
+ free(ctx->priv->rhs);
+ free(ctx->priv->coeff_scale);
+
+ qms_pssolve_context_free(&ctx->priv->ps_ctx);
+
+#if HAVE_OPENCL
+ if (ctx->priv->ocl_queue)
+ clReleaseCommandQueue(ctx->priv->ocl_queue);
+ if (ctx->priv->ocl_ctx)
+ clReleaseContext(ctx->priv->ocl_ctx);
+#endif
+ }
+
+ free(ctx->priv);
+
+ free(ctx->coeffs);
+
+ free(ctx);
+ *pctx = NULL;
+}