diff options
Diffstat (limited to 'src/md_solve.c')
-rw-r--r-- | src/md_solve.c | 818 |
1 files changed, 818 insertions, 0 deletions
diff --git a/src/md_solve.c b/src/md_solve.c new file mode 100644 index 0000000..c7fa329 --- /dev/null +++ b/src/md_solve.c @@ -0,0 +1,818 @@ +/* + * Minimal distortion -- actual solver code + * Copyright (C) 2016 Anton Khirnov <anton@khirnov.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "common.h" + +#include <errno.h> +#include <math.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if HAVE_OPENCL +#include <cl.h> +#include <clBLAS.h> +#endif + +#include "cctk.h" +#include "cctk_Timers.h" +#include "util_Table.h" + +#include "basis.h" +#include "pssolve.h" +#include "md_solve.h" +#include "threadpool.h" + +#define NB_COEFFS(md) (md->nb_coeffs[0] * md->nb_coeffs[1]) +#define NB_COLLOC_POINTS(md) (md->nb_colloc_points[0] * md->nb_colloc_points[1]) + +/* indices (in our code, not cactus structs) of the grid functions which we'll need to + * interpolate on the pseudospectral grid */ +enum MetricVars { + GTXX = 0, + GTYY, + GTZZ, + GTXY, + GTXZ, + GTYZ, + PHI, + ATXX, + ATYY, + ATZZ, + ATXY, + ATXZ, + ATYZ, + XTX, + XTY, + XTZ, + ALPHA, + TRK, + NB_METRIC_VARS, +}; + +/* indices of the interpolated values of the above grid functions and their derivatives */ +enum InterpMetricVars { + I_GTXX = 0, + I_GTYY, + I_GTZZ, + I_GTXY, + I_GTXZ, + I_GTYZ, + I_GTXX_DX, + I_GTYY_DX, + I_GTZZ_DX, + I_GTXZ_DX, + I_GTXX_DZ, + I_GTYY_DZ, + I_GTZZ_DZ, + I_GTXZ_DZ, + I_GTXX_DXX, + I_GTYY_DXX, + I_GTZZ_DXX, + I_GTXZ_DXX, + I_GTXX_DXZ, + I_GTYY_DXZ, + I_GTZZ_DXZ, + I_GTXZ_DXZ, + I_GTXX_DZZ, + I_GTYY_DZZ, + I_GTZZ_DZZ, + I_GTXZ_DZZ, + I_PHI, + I_PHI_DX, + I_PHI_DY, + I_PHI_DZ, + I_PHI_DXX, + I_PHI_DZZ, + I_PHI_DXZ, + I_ATXX, + I_ATYY, + I_ATZZ, + I_ATXY, + I_ATXZ, + I_ATYZ, + I_ATXX_DX, + I_ATYY_DX, + I_ATZZ_DX, + I_ATXZ_DX, + I_ATXX_DZ, + I_ATYY_DZ, + I_ATZZ_DZ, + I_ATXZ_DZ, + I_XTX, + I_XTY, + I_XTZ, + I_ALPHA, + I_ALPHA_DX, + I_ALPHA_DY, + I_ALPHA_DZ, + I_TRK, + I_TRK_DX, + I_TRK_DZ, + NB_INTERP_VARS, +}; + +/* per-equation state */ +typedef struct MDEquationContext { + double *interp_coords[3]; + double *interp_values[NB_INTERP_VARS]; + + /* eq_coeffs[i][j] is an array of coefficients at the collocation points + * for j-th derivative of i-th unknown function */ + double *(*eq_coeffs)[PSSOLVE_DIFF_ORDER_NB]; + + double *rhs; +} MDEquationContext; + +struct MDSolverPriv { + PSSolveContext *ps_ctx; + cGH *gh; + + MDEquationContext *eqs; + + int colloc_grid_order[2]; + + double *rhs; + + double *coeff_scale; + + // interpolation parameters + int coord_system; + int interp_operator; + int interp_params; + + CCTK_REAL *interp_coords[3]; + + int interp_vars_indices[NB_METRIC_VARS]; + CCTK_REAL *interp_values[NB_INTERP_VARS]; + CCTK_INT interp_value_codes[NB_INTERP_VARS]; + +#if HAVE_OPENCL + // OpenCL / CLBLAS stuff + cl_context ocl_ctx; + cl_command_queue ocl_queue; +#endif + + ThreadPoolContext *tp; + ThreadPoolContext *tp_internal; + + uint64_t solve_count; + uint64_t solve_time; + + uint64_t interp_geometry_count; + uint64_t interp_geometry_time; + + uint64_t calc_eq_coeffs_count; + uint64_t calc_eq_coeffs_time; +}; + +typedef struct MDCalcEqThread { + MDSolver *ctx; + MDEquationContext *eq_ctx; + size_t block_size; +} MDCalcEqThread; + +/* mapping between our indices and thorn names */ +static const char *metric_vars[] = { + [GTXX] = "ML_BSSN::gt11", + [GTYY] = "ML_BSSN::gt22", + [GTZZ] = "ML_BSSN::gt33", + [GTXY] = "ML_BSSN::gt12", + [GTXZ] = "ML_BSSN::gt13", + [GTYZ] = "ML_BSSN::gt23", + [ATXX] = "ML_BSSN::At11", + [ATYY] = "ML_BSSN::At22", + [ATZZ] = "ML_BSSN::At33", + [ATXY] = "ML_BSSN::At12", + [ATXZ] = "ML_BSSN::At13", + [ATYZ] = "ML_BSSN::At23", + [PHI] = "ML_BSSN::phi", + [XTX] = "ML_BSSN::Xt1", + [XTY] = "ML_BSSN::Xt2", + [XTZ] = "ML_BSSN::Xt3", + [ALPHA] = "ML_BSSN::alpha", + [TRK] = "ML_BSSN::trK", +}; + +/* mapping between the cactus grid values and interpolated values */ +static const CCTK_INT interp_operation_indices[] = { + [I_GTXX] = GTXX, + [I_GTYY] = GTYY, + [I_GTZZ] = GTZZ, + [I_GTXY] = GTXY, + [I_GTXZ] = GTXZ, + [I_GTYZ] = GTYZ, + [I_GTXX_DX] = GTXX, + [I_GTYY_DX] = GTYY, + [I_GTZZ_DX] = GTZZ, + [I_GTXZ_DX] = GTXZ, + [I_GTXX_DZ] = GTXX, + [I_GTYY_DZ] = GTYY, + [I_GTZZ_DZ] = GTZZ, + [I_GTXZ_DZ] = GTXZ, + [I_GTXX_DXX] = GTXX, + [I_GTYY_DXX] = GTYY, + [I_GTZZ_DXX] = GTZZ, + [I_GTXZ_DXX] = GTXZ, + [I_GTXX_DXZ] = GTXX, + [I_GTYY_DXZ] = GTYY, + [I_GTZZ_DXZ] = GTZZ, + [I_GTXZ_DXZ] = GTXZ, + [I_GTXX_DZZ] = GTXX, + [I_GTYY_DZZ] = GTYY, + [I_GTZZ_DZZ] = GTZZ, + [I_GTXZ_DZZ] = GTXZ, + [I_PHI] = PHI, + [I_PHI_DX] = PHI, + [I_PHI_DY] = PHI, + [I_PHI_DZ] = PHI, + [I_PHI_DXX] = PHI, + [I_PHI_DZZ] = PHI, + [I_PHI_DXZ] = PHI, + [I_ATXX] = ATXX, + [I_ATYY] = ATYY, + [I_ATZZ] = ATZZ, + [I_ATXY] = ATXY, + [I_ATXZ] = ATXZ, + [I_ATYZ] = ATYZ, + [I_ATXX_DX] = ATXX, + [I_ATYY_DX] = ATYY, + [I_ATZZ_DX] = ATZZ, + [I_ATXZ_DX] = ATXZ, + [I_ATXX_DZ] = ATXX, + [I_ATYY_DZ] = ATYY, + [I_ATZZ_DZ] = ATZZ, + [I_ATXZ_DZ] = ATXZ, + [I_XTX] = XTX, + [I_XTY] = XTY, + [I_XTZ] = XTZ, + [I_ALPHA] = ALPHA, + [I_ALPHA_DX] = ALPHA, + [I_ALPHA_DY] = ALPHA, + [I_ALPHA_DZ] = ALPHA, + [I_TRK] = TRK, + [I_TRK_DX] = TRK, + [I_TRK_DZ] = TRK, +}; + +/* the operation (plain value or x/y/z-derivative) to apply during interpolation */ +static const CCTK_INT interp_operation_codes[] = { + [I_GTXX] = 0, + [I_GTYY] = 0, + [I_GTZZ] = 0, + [I_GTXY] = 0, + [I_GTXZ] = 0, + [I_GTYZ] = 0, + [I_GTXX_DX] = 1, + [I_GTYY_DX] = 1, + [I_GTZZ_DX] = 1, + [I_GTXZ_DX] = 1, + [I_GTXX_DZ] = 3, + [I_GTYY_DZ] = 3, + [I_GTZZ_DZ] = 3, + [I_GTXZ_DZ] = 3, + [I_GTXX_DXX] = 11, + [I_GTYY_DXX] = 11, + [I_GTZZ_DXX] = 11, + [I_GTXZ_DXX] = 11, + [I_GTXX_DXZ] = 13, + [I_GTYY_DXZ] = 13, + [I_GTZZ_DXZ] = 13, + [I_GTXZ_DXZ] = 13, + [I_GTXX_DZZ] = 33, + [I_GTYY_DZZ] = 33, + [I_GTZZ_DZZ] = 33, + [I_GTXZ_DZZ] = 33, + [I_PHI] = 0, + [I_PHI_DX] = 1, + [I_PHI_DY] = 2, + [I_PHI_DZ] = 3, + [I_PHI_DXX] = 11, + [I_PHI_DZZ] = 33, + [I_PHI_DXZ] = 13, + [I_ATXX] = 0, + [I_ATYY] = 0, + [I_ATZZ] = 0, + [I_ATXY] = 0, + [I_ATXZ] = 0, + [I_ATYZ] = 0, + [I_ATXX_DX] = 1, + [I_ATYY_DX] = 1, + [I_ATZZ_DX] = 1, + [I_ATXZ_DX] = 1, + [I_ATXX_DZ] = 3, + [I_ATYY_DZ] = 3, + [I_ATZZ_DZ] = 3, + [I_ATXZ_DZ] = 3, + [I_XTX] = 0, + [I_XTY] = 0, + [I_XTZ] = 0, + [I_ALPHA] = 0, + [I_ALPHA_DX] = 1, + [I_ALPHA_DY] = 2, + [I_ALPHA_DZ] = 3, + [I_TRK] = 0, + [I_TRK_DX] = 1, + [I_TRK_DZ] = 3, +}; + +/* interpolate the cactus gridfunctions onto the pseudospectral grid */ +static int interp_geometry(MDSolver *ctx) +{ + MDSolverPriv *s = ctx->priv; + int ret; + + for (int i = 0; i < ctx->nb_equations; i++) { + MDEquationContext *eq_ctx = &s->eqs[i]; + + ret = CCTK_InterpGridArrays(s->gh, 3, s->interp_operator, s->interp_params, + s->coord_system, NB_COLLOC_POINTS(ctx), CCTK_VARIABLE_REAL, + (const void * const *)eq_ctx->interp_coords, ARRAY_ELEMS(s->interp_vars_indices), s->interp_vars_indices, + ARRAY_ELEMS(eq_ctx->interp_values), s->interp_value_codes, (void * const *)eq_ctx->interp_values); + if (ret < 0) + CCTK_WARN(0, "Error interpolating"); + } + + return 0; +} + +#if 0 +#define EQUATION 0 +#include "md_solve_template.c" +#undef EQUATION + +#define EQUATION 1 +#include "md_solve_template.c" +#undef EQUATION +#else +#define EQUATION 0 +#include "gamma_freeze_template.c" +#undef EQUATION + +#define EQUATION 1 +#include "gamma_freeze_template.c" +#undef EQUATION +#endif + +static void (*calc_eq_coeffs[2])(void *, unsigned int, unsigned int, + unsigned int, unsigned int) = { + calc_eq_coeffs_0, + calc_eq_coeffs_1, +}; + +int md_solver_solve(MDSolver *ctx) +{ + MDSolverPriv *s = ctx->priv; + const double *(*eq_coeffs[2])[PSSOLVE_DIFF_ORDER_NB]; + int ret; + int64_t start, totaltime_start; + + totaltime_start = gettime(); + + /* interpolate the metric values and construct the quantities we'll need */ + CCTK_TimerStart("MinimalDistortion_interp_geometry"); + start = gettime(); + + ret = interp_geometry(ctx); + + s->interp_geometry_time += gettime() - start; + s->interp_geometry_count++; + CCTK_TimerStop("MinimalDistortion_interp_geometry"); + if (ret < 0) + return ret; + + CCTK_TimerStart("MinimalDistortion_calc_eq_coeffs"); + start = gettime(); + + for (int i = 0; i < ctx->nb_equations; i++) { + MDCalcEqThread thread = { + .ctx = ctx, + .eq_ctx = &s->eqs[i], + .block_size = 256, + }; + + md_threadpool_execute(s->tp, (NB_COLLOC_POINTS(ctx) + thread.block_size - 1) / thread.block_size, + calc_eq_coeffs[i], &thread); + } + + eq_coeffs[0] = s->eqs[0].eq_coeffs; + eq_coeffs[1] = s->eqs[1].eq_coeffs; + + s->calc_eq_coeffs_time += gettime() - start; + s->calc_eq_coeffs_count++; + CCTK_TimerStop("MinimalDistortion_calc_eq_coeffs"); + if (ret < 0) + return ret; + + ret = md_pssolve_solve(s->ps_ctx, + eq_coeffs, + s->rhs, ctx->coeffs); + if (ret < 0) + return ret; + + //for (int i = 0; i < ctx->nb_equations * NB_COEFFS(ctx); i++) + // ctx->coeffs[i] *= s->coeff_scale[i]; + + s->solve_count++; + s->solve_time += gettime() - totaltime_start; + + return 0; +} + +void md_solver_print_stats(MDSolver *ctx) +{ + MDSolverPriv *s = ctx->priv; + + fprintf(stderr, + "%g%% interpolate geometry: %lu, " + "total time %g s, avg time per call %g ms\n", + (double)s->interp_geometry_time * 100 / s->solve_time, + s->interp_geometry_count, (double)s->interp_geometry_time / 1e6, + (double)s->interp_geometry_time / s->interp_geometry_count / 1e3); + fprintf(stderr, + "%g%% calc equation coefficients: %lu, " + "total time %g s, avg time per call %g ms\n", + (double)s->calc_eq_coeffs_time * 100 / s->solve_time, + s->calc_eq_coeffs_count, (double)s->calc_eq_coeffs_time / 1e6, + (double)s->calc_eq_coeffs_time / s->calc_eq_coeffs_count / 1e3); + fprintf(stderr, + "%g%% pseudospectral matrix construction: %lu, " + "total time %g s, avg time per call %g ms\n", + (double)s->ps_ctx->construct_matrix_time * 100 / s->solve_time, + s->ps_ctx->construct_matrix_count, (double)s->ps_ctx->construct_matrix_time / 1e6, + (double)s->ps_ctx->construct_matrix_time / s->ps_ctx->construct_matrix_count / 1e3); + fprintf(stderr, + "%g%% BiCGSTAB %lu solves, " + "%lu iterations, total time %g s, " + "avg iterations per solve %g, avg time per solve %g ms, " + "avg time per iteration %g ms\n", + (double)s->ps_ctx->cg_time_total * 100 / s->solve_time, + s->ps_ctx->cg_solve_count, s->ps_ctx->cg_iter_count, (double)s->ps_ctx->cg_time_total / 1e6, + (double)s->ps_ctx->cg_iter_count / s->ps_ctx->cg_solve_count, + (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_solve_count / 1e3, + (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_iter_count / 1e3); + fprintf(stderr, + "%g%% LU %lu solves, total time %g s, avg time per solve %g ms\n", + (double)s->ps_ctx->lu_solves_time * 100 / s->solve_time, + s->ps_ctx->lu_solves_count, (double)s->ps_ctx->lu_solves_time / 1e6, + (double)s->ps_ctx->lu_solves_time / s->ps_ctx->lu_solves_count / 1e3); +} + +static void init_opencl(MDSolver *ctx) +#if HAVE_OPENCL +{ + MDSolverPriv *s = ctx->priv; + int err, count; + cl_platform_id platform; + cl_context_properties props[3]; + cl_device_id ocl_device; + + err = clGetPlatformIDs(1, &platform, &count); + if (err != CL_SUCCESS || count < 1) { + fprintf(stderr, "Could not get an OpenCL platform ID\n"); + return; + } + + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ocl_device, &count); + if (err != CL_SUCCESS || count < 1) { + fprintf(stderr, "Could not get an OpenCL device ID\n"); + return; + } + + props[0] = CL_CONTEXT_PLATFORM; + props[1] = (cl_context_properties)platform; + props[2] = 0; + + s->ocl_ctx = clCreateContext(props, 1, &ocl_device, NULL, NULL, &err); + if (err != CL_SUCCESS || !s->ocl_ctx) { + fprintf(stderr, "Could not create an OpenCL context\n"); + return; + } + + s->ocl_queue = clCreateCommandQueue(s->ocl_ctx, ocl_device, 0, &err); + if (err != CL_SUCCESS || !s->ocl_queue) { + fprintf(stderr, "Could not create an OpenCL command queue: %d\n", err); + goto fail; + } + + err = clblasSetup(); + if (err != CL_SUCCESS) { + fprintf(stderr, "Error setting up clBLAS\n"); + goto fail; + } + + return; +fail: + if (s->ocl_queue) + clReleaseCommandQueue(s->ocl_queue); + s->ocl_queue = 0; + + if (s->ocl_ctx) + clReleaseContext(s->ocl_ctx); + s->ocl_ctx = 0; +} +#else +{ +} +#endif + +static int eq_init(MDSolver *ctx, unsigned int eq_idx) +{ + MDSolverPriv *s = ctx->priv; + MDEquationContext *eq_ctx = &s->eqs[eq_idx]; + double *colloc_grid[2] = { s->ps_ctx->colloc_grid[eq_idx][0], + s->ps_ctx->colloc_grid[eq_idx][1] }; + int ret; + + /* prepare the state for the cactus interpolator */ + for (int i = 0; i < ARRAY_ELEMS(eq_ctx->interp_coords); i++) { + ret = posix_memalign((void**)&eq_ctx->interp_coords[i], 32, + NB_COLLOC_POINTS(ctx) * sizeof(*eq_ctx->interp_coords[i])); + if (ret) + return -ENOMEM; + } + + for (int j = 0; j < ctx->nb_colloc_points[1]; j++) { + for (int i = 0; i < ctx->nb_colloc_points[0]; i++) { + eq_ctx->interp_coords[0][j * ctx->nb_colloc_points[0] + i] = colloc_grid[0][i]; + eq_ctx->interp_coords[1][j * ctx->nb_colloc_points[0] + i] = 0; + eq_ctx->interp_coords[2][j * ctx->nb_colloc_points[0] + i] = colloc_grid[1][j]; + } + } + + for (int i = 0; i < ARRAY_ELEMS(eq_ctx->interp_values); i++) { + ret = posix_memalign((void**)&eq_ctx->interp_values[i], 32, + NB_COLLOC_POINTS(ctx) * sizeof(*eq_ctx->interp_values[i])); + if (ret) + return -ENOMEM; + } + + /* allocate the equation coefficients */ + eq_ctx->eq_coeffs = calloc(ctx->nb_equations, sizeof(*eq_ctx->eq_coeffs)); + if (!eq_ctx->eq_coeffs) + return -ENOMEM; + for (int i = 0; i < ctx->nb_equations; i++) + for (int j = 0; j < ARRAY_ELEMS(eq_ctx->eq_coeffs[i]); j++) { + ret = posix_memalign((void**)&eq_ctx->eq_coeffs[i][j], 32, + NB_COLLOC_POINTS(ctx) * sizeof(*eq_ctx->eq_coeffs[i][j])); + if (ret) + return -ENOMEM; + } + + /* setup the RHS pointer */ + if (eq_idx == 0) + eq_ctx->rhs = s->rhs; + else + eq_ctx->rhs = s->eqs[eq_idx - 1].rhs + NB_COLLOC_POINTS(ctx); + + return 0; +} + +static const enum MDBasisFamily basis_sets[2][2] = { + { MD_BASIS_FAMILY_SB_ODD, MD_BASIS_FAMILY_SB_EVEN }, + { MD_BASIS_FAMILY_SB_EVEN, MD_BASIS_FAMILY_SB_ODD }, +}; + +int md_solver_init(MDSolver **pctx, + cGH *cctkGH, ThreadPoolContext *tp, + unsigned int nb_equations, + unsigned int (*basis_order)[2], + double sf, double filter_power, double input_filter_power) +{ + MDSolver *ctx; + MDSolverPriv *s; + int max_order = 0; + int ret; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return -ENOMEM; + + ctx->priv = calloc(1, sizeof(*ctx->priv)); + if (!ctx->priv) + goto fail; + s = ctx->priv; + + s->gh = cctkGH; + + if (tp) { + s->tp = tp; + } else { + ret = md_threadpool_init(&s->tp_internal, 1); + if (ret < 0) + goto fail; + s->tp = s->tp_internal; + } + + s->eqs = calloc(nb_equations, sizeof(*s->eqs)); + if (!s->eqs) + goto fail; + ctx->nb_equations = nb_equations; + + ctx->nb_coeffs[0] = basis_order[0][0]; + ctx->nb_coeffs[1] = basis_order[0][1]; + + ctx->nb_colloc_points[0] = basis_order[0][0]; + ctx->nb_colloc_points[1] = basis_order[0][1]; + + if (NB_COLLOC_POINTS(ctx) != NB_COEFFS(ctx)) + CCTK_WARN(0, "Non-square collocation matrix"); + + s->colloc_grid_order[0] = ctx->nb_colloc_points[0]; + s->colloc_grid_order[1] = ctx->nb_colloc_points[1]; + + ret = posix_memalign((void**)&ctx->coeffs, 32, sizeof(*ctx->coeffs) * nb_equations * NB_COEFFS(ctx)); + ret |= posix_memalign((void**)&s->rhs, 32, sizeof(*s->rhs) * nb_equations * NB_COLLOC_POINTS(ctx)); + if (ret) + goto fail; + + for (int i = 0; i < ctx->nb_equations; i++) + for (int j = 0; j < 2; j++) { + double sf; + + ret = md_basis_init(&ctx->basis[i][j], basis_sets[i][j], 1.0); + if (ret < 0) + goto fail; + + sf = 64.0 / md_basis_colloc_point(ctx->basis[i][j], s->colloc_grid_order[j], + ctx->nb_colloc_points[j] - 1); + md_basis_free(&ctx->basis[i][j]); + + ret = md_basis_init(&ctx->basis[i][j], basis_sets[i][j], sf); + if (ret < 0) + goto fail; + } + + init_opencl(ctx); + + ret = md_pssolve_context_alloc(&s->ps_ctx, 2); + if (ret < 0) + CCTK_WARN(0, "Error allocating the pseudospectral solver"); + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) { + s->ps_ctx->basis[i][j] = ctx->basis[i][j]; + s->ps_ctx->solve_order[i][j] = basis_order[i][j]; + max_order = MAX(max_order, basis_order[i][j]); + } + + s->ps_ctx->tp = s->tp; + +#if HAVE_OPENCL + s->ps_ctx->ocl_ctx = s->ocl_ctx; + s->ps_ctx->ocl_queue = s->ocl_queue; +#endif + + ret = md_pssolve_context_init(s->ps_ctx); + if (ret < 0) + CCTK_WARN(0, "Error initializing the pseudospectral solver"); + + for (int i = 0; i < max_order; i++) { + fprintf(stderr, "%d ", i); + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) { + if (i < s->ps_ctx->solve_order[j][k]) + fprintf(stderr, "%8.8g\t", s->ps_ctx->colloc_grid[j][k][i]); + else + fprintf(stderr, " "); + } + fprintf(stderr, "\n"); + } + + /* init the per-equation state */ + for (int i = 0; i < ctx->nb_equations; i++) { + ret = eq_init(ctx, i); + if (ret < 0) + goto fail; + } + + ret = posix_memalign((void**)&s->coeff_scale, 32, 2 * NB_COEFFS(ctx) * sizeof(*s->coeff_scale)); + if (ret) + goto fail; + for (int j = 0; j < ctx->nb_coeffs[1]; j++) + for (int i = 0; i < ctx->nb_coeffs[0]; i++) { + s->coeff_scale[j * ctx->nb_coeffs[0] + i] = exp(-36.0 * pow((double)i / ctx->nb_coeffs[0], filter_power)) * + exp(-36.0 * pow((double)j / ctx->nb_coeffs[1], filter_power)); + s->coeff_scale[NB_COEFFS(ctx) + j * ctx->nb_coeffs[0] + i] = exp(-36.0 * pow((double)i / ctx->nb_coeffs[0], filter_power)) * + exp(-36.0 * pow((double)j / ctx->nb_coeffs[1], filter_power)); + } + + for (int i = 0; i < ARRAY_ELEMS(s->interp_values); i++) { +#if 0 + ret = posix_memalign((void**)&s->interp_values[i], 32, + 2 * NB_COLLOC_POINTS(ctx) * sizeof(*s->interp_values[i])); + if (ret) + goto fail; +#endif + s->interp_value_codes[i] = CCTK_VARIABLE_REAL; + } + + for (int i = 0; i < ARRAY_ELEMS(metric_vars); i++) { + s->interp_vars_indices[i] = CCTK_VarIndex(metric_vars[i]); + if (s->interp_vars_indices[i] < 0) + CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING, "Error getting the index of variable: %s\n", metric_vars[i]); + } + + s->coord_system = CCTK_CoordSystemHandle("cart3d"); + if (s->coord_system < 0) + CCTK_WARN(0, "Error getting the coordinate system"); + + s->interp_operator = CCTK_InterpHandle("Lagrange polynomial interpolation (tensor product)"); + if (s->interp_operator < 0) + CCTK_WARN(0, "Error getting the interpolation operator"); + + s->interp_params = Util_TableCreateFromString("order=4 want_global_mode=1"); + if (s->interp_params < 0) + CCTK_WARN(0, "Error creating interpolation parameters table"); + + ret = Util_TableSetIntArray(s->interp_params, NB_INTERP_VARS, + interp_operation_codes, "operation_codes"); + if (ret < 0) + CCTK_WARN(0, "Error setting operation codes"); + + ret = Util_TableSetIntArray(s->interp_params, NB_INTERP_VARS, + interp_operation_indices, "operand_indices"); + if (ret < 0) + CCTK_WARN(0, "Error setting operand indices"); + + CCTK_TimerCreate("MinimalDistortion_Solve"); + CCTK_TimerCreate("MinimalDistortion_Expand"); + CCTK_TimerCreate("MinimalDistortion_interp_geometry"); + CCTK_TimerCreate("MinimalDistortion_calc_eq_coeffs"); + CCTK_TimerCreate("MinimalDistortion_construct_matrix"); + CCTK_TimerCreate("MinimalDistortion_solve_LU"); + CCTK_TimerCreate("MinimalDistortion_solve_BiCGSTAB"); + + *pctx = ctx; + return 0; +fail: + md_solver_free(&ctx); + return -ENOMEM; +} + +void md_solver_free(MDSolver **pctx) +{ + MDSolver *ctx = *pctx; + + if (!ctx) + return; + + if (ctx->priv) { + for (int i = 0; i < ARRAY_ELEMS(ctx->priv->interp_coords); i++) + free(ctx->priv->interp_coords[i]); + for (int i = 0; i < ARRAY_ELEMS(ctx->priv->interp_values); i++) + free(ctx->priv->interp_values[i]); + free(ctx->priv->rhs); + free(ctx->priv->coeff_scale); + + for (int i = 0; i < ctx->nb_equations; i++) { + MDEquationContext *eq_ctx = &ctx->priv->eqs[i]; + for (int j = 0; j < ARRAY_ELEMS(eq_ctx->interp_coords); j++) + free(eq_ctx->interp_coords[j]); + for (int j = 0; j < ARRAY_ELEMS(eq_ctx->interp_values); j++) + free(eq_ctx->interp_values[j]); + + if (eq_ctx->eq_coeffs) { + for (int j = 0; j < ctx->nb_equations; j++) + for (int k = 0; k < ARRAY_ELEMS(eq_ctx->eq_coeffs[j]); k++) + free(eq_ctx->eq_coeffs[j][k]); + } + free(eq_ctx->eq_coeffs); + } + free(ctx->priv->eqs); + + md_pssolve_context_free(&ctx->priv->ps_ctx); + + md_threadpool_free(&ctx->priv->tp_internal); + +#if HAVE_OPENCL + if (ctx->priv->ocl_queue) + clReleaseCommandQueue(ctx->priv->ocl_queue); + if (ctx->priv->ocl_ctx) + clReleaseContext(ctx->priv->ocl_ctx); +#endif + } + + free(ctx->priv); + + free(ctx->coeffs); + + free(ctx); + *pctx = NULL; +} |