summaryrefslogtreecommitdiff
path: root/src/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/md.c')
-rw-r--r--src/md.c573
1 files changed, 573 insertions, 0 deletions
diff --git a/src/md.c b/src/md.c
new file mode 100644
index 0000000..21e38fc
--- /dev/null
+++ b/src/md.c
@@ -0,0 +1,573 @@
+#include "common.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <float.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cblas.h>
+
+#include "cctk.h"
+#include "cctk_Arguments.h"
+#include "cctk_Parameters.h"
+#include "cctk_Timers.h"
+#include "util_Table.h"
+
+#include "md.h"
+#include "md_solve.h"
+#include "threadpool.h"
+
+typedef struct EvalContext {
+ struct MDContext *md;
+ struct CoordPatch *cp;
+ const double *x;
+ const double *z;
+ double *W;
+
+ const double *coeffs;
+ double nb_coeffs[2];
+
+ double *eval_tmp[2];
+
+ unsigned int x_idx_start;
+ unsigned int x_idx_end;
+ unsigned int z_idx_start;
+ unsigned int z_idx_end;
+} EvalContext;
+
+/* precomputed values for a given refined grid */
+typedef struct CoordPatch {
+ CCTK_REAL origin[3];
+ CCTK_INT delta[3];
+ CCTK_INT size[3];
+
+ // basis values on the grid
+ double *basis_val_r;
+ double *basis_val_z;
+
+ double *transform_z;
+ double *transform_matrix;
+ double *transform_matrix1;
+ double *transform_matrix2;
+ double *transform_matrix3;
+ double *transform_tmp;
+
+ int y_idx;
+
+ int nb_threads;
+ ThreadPoolContext *tp;
+ EvalContext *ec;
+} CoordPatch;
+
+struct MDContext {
+ MDSolver *solver;
+ cGH *gh;
+ ThreadPoolContext *tp;
+
+ struct {
+ double time;
+ double *coeffs;
+ } solution_cache[8];
+ int nb_solutions;
+
+ double *coeffs_eval;
+
+ uint64_t grid_expand_count;
+ uint64_t grid_expand_time;
+
+ CoordPatch *patches;
+ int nb_patches;
+};
+
+/* get an approximate "main" frequency component in a basis function */
+static double calc_basis_freq(const MDBasisSetContext *b, int order)
+{
+ return md_basis_colloc_point(b, order, 1);
+}
+
+static CoordPatch *get_coord_patch(MDContext *md,
+ CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z,
+ double scale_factor, double scale_power)
+{
+ cGH *cctkGH = md->gh;
+
+ CoordPatch *cp;
+ int64_t grid_size;
+ int i, block_size;
+ const char *nb_threads;
+
+ for (int i = 0; i < md->nb_patches; i++) {
+ cp = &md->patches[i];
+
+ if (cp->origin[0] == md->gh->cctk_origin_space[0] &&
+ cp->origin[1] == md->gh->cctk_origin_space[1] &&
+ cp->origin[2] == md->gh->cctk_origin_space[2] &&
+ cp->size[0] == md->gh->cctk_lsh[0] &&
+ cp->size[1] == md->gh->cctk_lsh[1] &&
+ cp->size[2] == md->gh->cctk_lsh[2] &&
+ cp->delta[0] == md->gh->cctk_levfac[0] &&
+ cp->delta[1] == md->gh->cctk_levfac[1] &&
+ cp->delta[2] == md->gh->cctk_levfac[2])
+ return cp;
+ }
+
+ grid_size = cctkGH->cctk_lsh[0] * cctkGH->cctk_lsh[1] * cctkGH->cctk_lsh[2];
+
+ /* create a new patch */
+ md->patches = realloc(md->patches, sizeof(*md->patches) * (md->nb_patches + 1));
+ cp = &md->patches[md->nb_patches];
+
+ memset(cp, 0, sizeof(*cp));
+
+ memcpy(cp->origin, md->gh->cctk_origin_space, sizeof(cp->origin));
+ memcpy(cp->size, md->gh->cctk_lsh, sizeof(cp->size));
+ memcpy(cp->delta, md->gh->cctk_levfac, sizeof(cp->delta));
+
+ for (i = 0; i < cp->size[1]; i++)
+ if (fabs(y[CCTK_GFINDEX3D(cctkGH, 0, i, 0)]) < 1e-8) {
+ cp->y_idx = i;
+ break;
+ }
+ if (i == cp->size[1])
+ CCTK_WARN(0, "The grid does not include y==0");
+
+#if MD_POLAR || 1
+ posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * md->solver->nb_coeffs[0] * cp->size[0] * cp->size[2]);
+ posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * md->solver->nb_coeffs[1] * cp->size[0] * cp->size[2]);
+ posix_memalign((void**)&cp->transform_matrix2, 32, sizeof(*cp->transform_matrix2) * md->solver->nb_coeffs[0] * cp->size[0] * cp->size[2]);
+ posix_memalign((void**)&cp->transform_matrix3, 32, sizeof(*cp->transform_matrix3) * md->solver->nb_coeffs[1] * cp->size[0] * cp->size[2]);
+#pragma omp parallel for
+ for (int j = 0; j < cp->size[2]; j++) {
+ double zz = z[CCTK_GFINDEX3D(md->gh, 0, 0, j)];
+
+ for (int i = 0; i < cp->size[0]; i++) {
+ const int idx_grid = j * cp->size[0] + i;
+
+ double xx = x[CCTK_GFINDEX3D(md->gh, i, 0, 0)];
+ double rr = sqrt(SQR(xx) + SQR(zz));
+
+ double coord0 = xx;
+ double coord1 = zz;
+
+ //for (int k = 0; k < md->nb_coeffs_z; k++)
+ // for (int l = 0; l < md->nb_coeffs_x; l++) {
+ // const int idx_coeff = k * md->nb_coeffs_x + l;
+ // cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * idx_coeff] = md->basis->eval(r, l) * md->basis1->eval(phi, k);
+ // }
+ for (int k = 0; k < md->solver->nb_coeffs[0]; k++) {
+ double dx = calc_basis_freq(md->solver->basis[0][0], k);
+ double r0 = MIN(60.0, dx * scale_factor);
+ double fact = exp(-36.0 * pow(rr / r0, scale_power));
+
+ cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = md_basis_eval(md->solver->basis[0][0], MD_BASIS_EVAL_TYPE_VALUE, coord0, k) * fact;
+ }
+ for (int k = 0; k < md->solver->nb_coeffs[1]; k++) {
+ double dx = calc_basis_freq(md->solver->basis[0][1], k);
+ double r0 = MIN(60.0, dx * scale_factor);
+ double fact = exp(-36.0 * pow(rr / r0, scale_power));
+
+ cp->transform_matrix1[idx_grid * md->solver->nb_coeffs[1] + k] = md_basis_eval(md->solver->basis[0][1], MD_BASIS_EVAL_TYPE_VALUE, coord1, k) * fact;
+ }
+ for (int k = 0; k < md->solver->nb_coeffs[0]; k++) {
+ double dx = calc_basis_freq(md->solver->basis[1][0], k);
+ double r0 = MIN(60.0, dx * scale_factor);
+ double fact = exp(-36.0 * pow(rr / r0, scale_power));
+
+ cp->transform_matrix2[idx_grid + cp->size[0] * cp->size[2] * k] = md_basis_eval(md->solver->basis[1][0], MD_BASIS_EVAL_TYPE_VALUE, coord0, k) * fact;
+ }
+ for (int k = 0; k < md->solver->nb_coeffs[1]; k++) {
+ double dx = calc_basis_freq(md->solver->basis[1][1], k);
+ double r0 = MIN(60.0, dx * scale_factor);
+ double fact = exp(-36.0 * pow(rr / r0, scale_power));
+
+ cp->transform_matrix3[idx_grid * md->solver->nb_coeffs[1] + k] = md_basis_eval(md->solver->basis[1][1], MD_BASIS_EVAL_TYPE_VALUE, coord1, k) * fact;
+ }
+ }
+ }
+ posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * md->solver->nb_coeffs[1]);
+#else
+ posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * md->solver->nb_coeffs[0] * md->gh->cctk_lsh[1] * md->gh->cctk_lsh[0]);
+ for (int j = 0; j < md->gh->cctk_lsh[1]; j++)
+ for (int i = 0; i < md->gh->cctk_lsh[0]; i++) {
+ CCTK_REAL xx = x[CCTK_GFINDEX3D(md->gh, i, j, 0)];
+ CCTK_REAL yy = y[CCTK_GFINDEX3D(md->gh, i, j, 0)];
+ CCTK_REAL r = sqrt(SQR(xx) + SQR(yy));
+
+ for (int k = 0; k < md->solver->nb_coeffs[0]; k++)
+ //cp->basis_val_r [(j * md->gh->cctk_lsh[0] + i) * md->nb_coeffs_x + k] = md->basis->eval(r, k);
+ cp->basis_val_r [(j * md->gh->cctk_lsh[0] + i) + md->gh->cctk_lsh[1] * md->gh->cctk_lsh[0] * k] = md->solver->basis[0]->eval(r, k);
+ }
+
+ posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * md->solver->nb_coeffs[1] * md->gh->cctk_lsh[2]);
+ for (int i = 0; i < md->gh->cctk_lsh[2]; i++) {
+ CCTK_REAL zz = z[CCTK_GFINDEX3D(md->gh, 0, 0, i)];
+ for (int j = 0; j < md->solver->nb_coeffs[1]; j++)
+ cp->basis_val_z [i * md->solver->nb_coeffs[1] + j] = md->solver->basis[0]->eval(fabs(zz), j);
+ //cp->basis_val_z [i + md->gh->cctk_lsh[2] * j] = md->basis->eval(zz, j);
+ }
+ posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * md->solver->nb_coeffs[0]);
+#endif
+
+#if 0
+ nb_threads = getenv("OMP_NUM_THREADS");
+ if (nb_threads)
+ cp->nb_threads = atoi(nb_threads);
+ if (cp->nb_threads <= 0)
+ cp->nb_threads = 1;
+ md_threadpool_init(&cp->tp, cp->nb_threads);
+ cp->ec = calloc(cp->nb_threads, sizeof(*cp->ec));
+
+ block_size = (md->gh->cctk_lsh[2] + cp->nb_threads - 1) / cp->nb_threads;
+
+ for (int i = 0; i < cp->nb_threads; i++) {
+ EvalContext *ec = &cp->ec[i];
+
+ ec->md = md;
+
+ ec->nb_coeffs[0] = md->solver->nb_coeffs[0];
+ ec->nb_coeffs[1] = md->solver->nb_coeffs[1];
+
+ posix_memalign((void**)&ec->eval_tmp[0], 32, sizeof(*ec->eval_tmp[0]) * ec->nb_coeffs[0]);
+ posix_memalign((void**)&ec->eval_tmp[1], 32, sizeof(*ec->eval_tmp[1]) * ec->nb_coeffs[1]);
+
+ ec->x_idx_start = 0;
+ ec->x_idx_end = md->gh->cctk_lsh[0];
+
+ ec->z_idx_start = block_size * i;
+ ec->z_idx_end = MIN(block_size * (i + 1), md->gh->cctk_lsh[2]);
+ }
+#endif
+
+ md->nb_patches++;
+ return cp;
+}
+
+static MDContext *md_context;
+
+static int context_init(cGH *cctkGH)
+{
+ int threads_type;
+ const int *threads = CCTK_ParameterGet("num_threads", "Carpet", &threads_type);
+
+ MDContext *md;
+ int ret;
+
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ md = calloc(1, sizeof(*md));
+ if (!md)
+ return -ENOMEM;
+
+ md->gh = cctkGH;
+
+ ret = md_threadpool_init(&md->tp, *threads);
+ if (ret < 0)
+ return ret;
+
+ ret = md_solver_init(&md->solver, cctkGH, md->tp, 2,
+ (unsigned int [2][2]){ { basis_order_r, basis_order_z },
+ { basis_order_r, basis_order_z }},
+ scale_factor, filter_power, 0.0);
+ if (ret < 0)
+ return ret;
+
+ ret = posix_memalign((void**)&md->coeffs_eval, 32,
+ basis_order_r * basis_order_z * sizeof(*md->coeffs_eval));
+ if (ret)
+ return -ENOMEM;
+
+ for (int i = 0; i < ARRAY_ELEMS(md->solution_cache); i++) {
+ ret = posix_memalign((void**)&md->solution_cache[i].coeffs, 32,
+ 2 * basis_order_r * basis_order_z * sizeof(*md->solution_cache[i].coeffs));
+ if (ret)
+ return -ENOMEM;
+ }
+
+ md_context = md;
+
+ return 0;
+}
+
+void minimal_distortion_solve(CCTK_ARGUMENTS)
+{
+ MDContext *md;
+
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ double time;
+
+ if (!md_context)
+ context_init(cctkGH);
+
+ md = md_context;
+
+ time = cctkGH->cctk_time / md->gh->cctk_delta_time;
+
+ //if (md->gh->cctk_levfac[0] != 1 || fabs(time - ceilf(time)) > 1e-8 ||
+ // (md->nb_solutions && md->solution_cache[md->nb_solutions - 1].time == cctkGH->cctk_time))
+ // return;
+ //if (md->gh->cctk_time < 10.0)
+ // return;
+
+ CCTK_TimerStart("MinimalDistortion_Solve");
+ md_solver_solve(md->solver);
+ CCTK_TimerStop("MinimalDistortion_Solve");
+
+ fprintf(stderr, "%d md solve: time %g %g %g\n", md->gh->cctk_levfac[0], md->gh->cctk_time, time, md->solver->coeffs[0]);
+ if (1) {
+ double *tmp;
+ if (md->nb_solutions == ARRAY_ELEMS(md->solution_cache)) {
+ tmp = md->solution_cache[0].coeffs;
+ memmove(md->solution_cache, md->solution_cache + 1, sizeof(md->solution_cache[0]) * (ARRAY_ELEMS(md->solution_cache) - 1));
+ } else {
+ md->nb_solutions++;
+ tmp = md->solution_cache[md->nb_solutions - 1].coeffs;
+ }
+ md->solution_cache[md->nb_solutions - 1].coeffs = md->solver->coeffs;
+ md->solution_cache[md->nb_solutions - 1].time = md->gh->cctk_time;
+
+ md->solver->coeffs = tmp;
+ }
+}
+
+double md_scalarproduct_metric_avx(size_t len1, size_t len2, const double *mat,
+ const double *vec1, const double *vec2);
+
+static double md_scalarproduct_metric_c(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2)
+{
+ double val = 0.0;
+ for (int l = 0; l < len2; l++) {
+ double tmp = 0.0;
+ for (int m = 0; m < len1; m++)
+ tmp += mat[l * len1 + m] * vec1[m];
+
+ val += tmp * vec2[l];
+ }
+ return val;
+}
+
+#if 0
+static void md_eval(void *arg,
+ unsigned int job_id, unsigned int nb_jobs,
+ unsigned int thread_idx, unsigned int nb_threads)
+{
+ EvalContext *e = (EvalContext*)arg + job_id;
+ CoordPatch *cp = e->cp;
+ MDContext *md = e->md;
+ const cGH *gh = e->md->gh;
+ double *W = e->W;
+
+ for (int k = e->z_idx_start; k < e->z_idx_end; k++) {
+ for (int i = e->x_idx_start; i < e->x_idx_end; i++) {
+ int idx = CCTK_GFINDEX3D(gh, i, cp->y_idx, k);
+ double xx = e->x[idx];
+ double zz = e->z[idx];
+ double r = sqrt(SQR(xx) + SQR(zz));
+ double phi = atan2(zz, xx);
+
+ double *basis_vec1 = e->eval_tmp[0];
+ double *basis_vec2 = e->eval_tmp[1];
+
+ for (int l = 0; l < e->nb_coeffs[0]; l++)
+ basis_vec1[l] = md->solver->basis[0]->eval(r, l);
+ for (int l = 0; l < e->nb_coeffs[0]; l++)
+ basis_vec2[l] = md->solver->basis[1]->eval(phi, l);
+
+ W[idx] = md_scalarproduct_metric_avx(e->nb_coeffs[0], e->nb_coeffs[1], e->coeffs,
+ basis_vec1, basis_vec2);
+ }
+ }
+}
+#endif
+
+void minimal_distortion_eval(CCTK_ARGUMENTS)
+{
+ MDContext *md;
+
+ CoordPatch *cp;
+
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ double *beta1 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::beta1");
+ double *beta3 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::beta3");
+
+ double time;
+
+ int64_t expand_start;
+
+ double *coeffs = NULL;
+ int i, ret;
+
+ if (!md_context)
+ context_init(cctkGH);
+
+ time = cctkGH->cctk_time;
+
+ md = md_context;
+
+ cp = get_coord_patch(md, x, y, z, scale_factor, scale_power);
+
+#if 1
+ //coeffs = md->coeffs;
+ coeffs = md->solution_cache[md->nb_solutions - 1].coeffs;
+#elif 0
+ if (time < 10.0) {
+ return;
+ } else if (time < 11.0) {
+ double fact = exp(-36.0 * pow((10.0 - time), 4.0));
+ double *coeffs_src = md->solution_cache[md->nb_solutions - 1].coeffs;
+
+ coeffs = md->coeffs_eval;
+ for (int i = 0; i < md->solver->nb_coeffs[0] * md->solver->nb_coeffs[1] * 2; i++)
+ coeffs[i] = coeffs_src[i] * fact;
+ } else
+ coeffs = md->solution_cache[md->nb_solutions - 1].coeffs;
+
+#else
+ coeffs = md->coeffs_eval;
+
+ if (cctkGH->cctk_levfac[0] < 1 || md->nb_solutions < 2) {
+ memset(coeffs, 0, sizeof(*coeffs) * md->solver->nb_coeffs[0] * md->solver->nb_coeffs[1]);
+ //fprintf(stderr, "md eval: time %g zero\n", md->gh->cctk_time);
+ } else {
+ double *coeffs0 = md->solution_cache[md->nb_solutions - 2].coeffs;
+ double *coeffs1 = md->solution_cache[md->nb_solutions - 1].coeffs;
+ double time0 = md->solution_cache[md->nb_solutions - 2].time;
+ double time1 = md->solution_cache[md->nb_solutions - 1].time;
+
+ double fact = 1.0;
+
+ //if (time < 9.0)
+ // fact = 1.0;
+ //else
+ // fact = exp(-36.0 * pow((time - 9.0), 4.0));
+ //else if (time < 0.1)
+ // fact = 0.0;
+ //else
+ // fact = (1.0 - exp(-pow((time - 0.0) / 0.25, 4.0)));
+ //fact = 1.0;
+
+ //fprintf(stderr, "md eval: time %g interp from %g %g %g\n", md->gh->cctk_time, time0, time1, fact);
+
+ for (int i = 0; i < 2 * md->solver->nb_coeffs[0] * md->solver->nb_coeffs[1]; i++)
+ coeffs[i] = (coeffs1[i] * (time - time0) / (time1 - time0) + coeffs0[i] * (time - time1) / (time0 - time1)) * fact;
+
+ }
+#endif
+
+ if (export_coeffs) {
+ memcpy(betax_coeffs, coeffs, sizeof(*coeffs) * md->solver->nb_coeffs[0] * md->solver->nb_coeffs[1]);
+ memcpy(betaz_coeffs, coeffs + md->solver->nb_coeffs[0] * md->solver->nb_coeffs[1],
+ sizeof(*coeffs) * md->solver->nb_coeffs[0] * md->solver->nb_coeffs[1]);
+ }
+
+ CCTK_TimerStart("MinimalDistortion_Expand");
+ expand_start = gettime();
+#if 0
+#pragma omp parallel for
+ for (int k = 0; k < cctk_lsh[2]; k++) {
+ for (int i = 0; i < cctk_lsh[0]; i++) {
+ int idx = CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, k);
+ double xx = x[idx];
+ double zz = z[idx];
+ double r = sqrt(SQR(xx) + SQR(zz));
+ double phi = atan2(zz, xx);
+
+ double val = 0.0;
+
+ for (int l = 0; l < md->nb_coeffs_z; l++) {
+ double tmp = 0.0;
+ for (int m = 0; m < md->nb_coeffs_x; m++) {
+ const int idx_coeff = l * md->nb_coeffs_x + m;
+ tmp += coeffs[idx_coeff] * md->basis->eval(r, m);
+ }
+ val += tmp * md->basis1->eval(phi, l);
+ }
+
+ W[idx] = val;
+ }
+ }
+#elif 0
+ {
+ for (int i = 0; i < cp->nb_threads; i++) {
+ cp->ec[i].cp = cp;
+ cp->ec[i].x = x;
+ cp->ec[i].z = z;
+ cp->ec[i].W = W;
+ cp->ec[i].coeffs = coeffs;
+ }
+ md_threadpool_execute(cp->tp, cp->nb_threads, md_eval, cp->ec);
+ }
+#elif MD_POLAR || 1
+ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ cctk_lsh[0] * cctk_lsh[2], md->solver->nb_coeffs[1], md->solver->nb_coeffs[0],
+ 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2],
+ coeffs, md->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]);
+#pragma omp parallel for
+ for (int j = 0; j < cctk_lsh[2]; j++)
+ for (int i = 0; i < cctk_lsh[0]; i++) {
+ const int idx_grid = j * cctk_lsh[0] + i;
+ const double val = cblas_ddot(md->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * md->solver->nb_coeffs[1], 1,
+ cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]);
+ beta1[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = val;
+ }
+ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ cctk_lsh[0] * cctk_lsh[2], md->solver->nb_coeffs[1], md->solver->nb_coeffs[0],
+ 1.0, cp->transform_matrix2, cctk_lsh[0] * cctk_lsh[2],
+ coeffs + md->solver->nb_coeffs[0] * md->solver->nb_coeffs[1],
+ md->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]);
+#pragma omp parallel for
+ for (int j = 0; j < cctk_lsh[2]; j++)
+ for (int i = 0; i < cctk_lsh[0]; i++) {
+ const int idx_grid = j * cctk_lsh[0] + i;
+ const double val = cblas_ddot(md->solver->nb_coeffs[1], cp->transform_matrix3 + idx_grid * md->solver->nb_coeffs[1], 1,
+ cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]);
+ beta3[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = val;
+ }
+#else
+ memset(W, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*W));
+ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ md->solver->nb_coeffs[0], cctk_lsh[2], md->solver->nb_coeffs[1], 1.0,
+ coeffs, md->solver->nb_coeffs[0], cp->basis_val_z, md->solver->nb_coeffs[1],
+ 0.0, cp->transform_z, md->solver->nb_coeffs[0]);
+ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], md->solver->nb_coeffs[0], 1.0,
+ cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, md->solver->nb_coeffs[0],
+ 1.0, W, cctk_lsh[0] * cctk_lsh[1]);
+#endif
+
+ md->grid_expand_time += gettime() - expand_start;
+ md->grid_expand_count++;
+
+ CCTK_TimerStop("MinimalDistortion_Expand");
+
+ /* print stats */
+ if (!(md->grid_expand_count & 255)) {
+ fprintf(stderr, "Minimal distortion stats:\n");
+
+ md_solver_print_stats(md->solver);
+
+ fprintf(stderr,
+ "%lu evals: total time %g s, avg time per call %g md\n",
+ md->grid_expand_count, (double)md->grid_expand_time / 1e6,
+ (double)md->grid_expand_time / md->grid_expand_count / 1e3);
+ }
+}
+
+void minimal_distortion_init(CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ if (!md_context)
+ context_init(cctkGH);
+}