summaryrefslogtreecommitdiff
path: root/src/qms.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/qms.c')
-rw-r--r--src/qms.c899
1 files changed, 171 insertions, 728 deletions
diff --git a/src/qms.c b/src/qms.c
index 7332017..95c645c 100644
--- a/src/qms.c
+++ b/src/qms.c
@@ -1,3 +1,5 @@
+#include "common.h"
+
#include <ctype.h>
#include <errno.h>
#include <float.h>
@@ -9,10 +11,6 @@
#include <string.h>
#include <cblas.h>
-#include <lapacke.h>
-
-#include <cl.h>
-#include <clBLAS.h>
#include "cctk.h"
#include "cctk_Arguments.h"
@@ -21,588 +19,19 @@
#include "util_Table.h"
#include "qms.h"
-
-#define ACC_TEST 0
+#include "qms_solve.h"
double scale_factor;
-/* mapping between our indices and thorn names */
-static const char *metric_vars[] = {
-#if CCZ4
- [GTXX] = "ML_CCZ4::gt11",
- [GTYY] = "ML_CCZ4::gt22",
- [GTZZ] = "ML_CCZ4::gt33",
- [GTXY] = "ML_CCZ4::gt12",
- [GTXZ] = "ML_CCZ4::gt13",
- [GTYZ] = "ML_CCZ4::gt23",
- [ATXX] = "ML_CCZ4::At11",
- [ATYY] = "ML_CCZ4::At22",
- [ATZZ] = "ML_CCZ4::At33",
- [ATXY] = "ML_CCZ4::At12",
- [ATXZ] = "ML_CCZ4::At13",
- [ATYZ] = "ML_CCZ4::At23",
- [PHI] = "ML_CCZ4::phi",
- [K] = "ML_CCZ4::trK",
- [XTX] = "ML_CCZ4::Xt1",
- [XTY] = "ML_CCZ4::Xt2",
- [XTZ] = "ML_CCZ4::Xt3",
- [BETAX] = "ML_CCZ4::beta1",
- [BETAY] = "ML_CCZ4::beta2",
- [BETAZ] = "ML_CCZ4::beta3",
- [ALPHA] = "ML_CCZ4::alpha",
- [KDOT_XX] = "ML_CCZ4::Kdot11",
- [KDOT_YY] = "ML_CCZ4::Kdot22",
- [KDOT_ZZ] = "ML_CCZ4::Kdot33",
- [KDOT_XY] = "ML_CCZ4::Kdot12",
- [KDOT_XZ] = "ML_CCZ4::Kdot13",
- [KDOT_YZ] = "ML_CCZ4::Kdot23",
- [XTDOT_X] = "ML_CCZ4::Xtdot1",
- [XTDOT_Y] = "ML_CCZ4::Xtdot2",
- [XTDOT_Z] = "ML_CCZ4::Xtdot3",
- [PHIDOT] = "ML_CCZ4::phidot",
-#else
- [GTXX] = "ML_BSSN::gt11",
- [GTYY] = "ML_BSSN::gt22",
- [GTZZ] = "ML_BSSN::gt33",
- [GTXY] = "ML_BSSN::gt12",
- [GTXZ] = "ML_BSSN::gt13",
- [GTYZ] = "ML_BSSN::gt23",
- [ATXX] = "ML_BSSN::At11",
- [ATYY] = "ML_BSSN::At22",
- [ATZZ] = "ML_BSSN::At33",
- [ATXY] = "ML_BSSN::At12",
- [ATXZ] = "ML_BSSN::At13",
- [ATYZ] = "ML_BSSN::At23",
- [PHI] = "ML_BSSN::phi",
- [K] = "ML_BSSN::trK",
- [XTX] = "ML_BSSN::Xt1",
- [XTY] = "ML_BSSN::Xt2",
- [XTZ] = "ML_BSSN::Xt3",
- [BETAX] = "ML_BSSN::beta1",
- [BETAY] = "ML_BSSN::beta2",
- [BETAZ] = "ML_BSSN::beta3",
- [ALPHA] = "ML_BSSN::alpha",
- //[ALPHA] = "ADMBase::alp",
- [KDOT_XX] = "ML_BSSN::Kdot11",
- [KDOT_YY] = "ML_BSSN::Kdot22",
- [KDOT_ZZ] = "ML_BSSN::Kdot33",
- [KDOT_XY] = "ML_BSSN::Kdot12",
- [KDOT_XZ] = "ML_BSSN::Kdot13",
- [KDOT_YZ] = "ML_BSSN::Kdot23",
- [XTDOT_X] = "ML_BSSN::Xtdot1",
- [XTDOT_Y] = "ML_BSSN::Xtdot2",
- [XTDOT_Z] = "ML_BSSN::Xtdot3",
- [PHIDOT] = "ML_BSSN::phidot",
-#endif
-};
-
-/* mapping between the cactus grid values and interpolated values */
-static const CCTK_INT interp_operation_indices[] = {
- [I_GTXX] = GTXX,
- [I_GTYY] = GTYY,
- [I_GTZZ] = GTZZ,
- [I_GTXY] = GTXY,
- [I_GTXZ] = GTXZ,
- [I_GTYZ] = GTYZ,
- [I_PHI] = PHI,
- [I_PHI_DX] = PHI,
- [I_PHI_DY] = PHI,
- [I_PHI_DZ] = PHI,
- [I_ATXX] = ATXX,
- [I_ATYY] = ATYY,
- [I_ATZZ] = ATZZ,
- [I_ATXY] = ATXY,
- [I_ATXZ] = ATXZ,
- [I_ATYZ] = ATYZ,
- [I_K] = K,
- [I_K_DX] = K,
- [I_K_DY] = K,
- [I_K_DZ] = K,
- [I_XTX] = XTX,
- [I_XTY] = XTY,
- [I_XTZ] = XTZ,
- [I_BETAX] = BETAX,
- [I_BETAY] = BETAY,
- [I_BETAZ] = BETAZ,
- [I_ALPHA] = ALPHA,
- [I_ALPHA_DX] = ALPHA,
- [I_ALPHA_DY] = ALPHA,
- [I_ALPHA_DZ] = ALPHA,
- [I_ALPHA_DXX] = ALPHA,
- [I_ALPHA_DYY] = ALPHA,
- [I_ALPHA_DZZ] = ALPHA,
- [I_ALPHA_DXY] = ALPHA,
- [I_ALPHA_DXZ] = ALPHA,
- [I_ALPHA_DYZ] = ALPHA,
- [I_KDOT_XX] = KDOT_XX,
- [I_KDOT_YY] = KDOT_YY,
- [I_KDOT_ZZ] = KDOT_ZZ,
- [I_KDOT_XY] = KDOT_XY,
- [I_KDOT_XZ] = KDOT_XZ,
- [I_KDOT_YZ] = KDOT_YZ,
- [I_XTDOT_X] = XTDOT_X,
- [I_XTDOT_Y] = XTDOT_Y,
- [I_XTDOT_Z] = XTDOT_Z,
- [I_PHIDOT] = PHIDOT,
- [I_PHIDOT_DX] = PHIDOT,
- [I_PHIDOT_DY] = PHIDOT,
- [I_PHIDOT_DZ] = PHIDOT,
-};
-
-/* the operation (plain value or x/y/z-derivative) to apply during interpolation */
-static const CCTK_INT interp_operation_codes[] = {
- [I_GTXX] = 0,
- [I_GTYY] = 0,
- [I_GTZZ] = 0,
- [I_GTXY] = 0,
- [I_GTXZ] = 0,
- [I_GTYZ] = 0,
- [I_PHI] = 0,
- [I_PHI_DX] = 1,
- [I_PHI_DY] = 2,
- [I_PHI_DZ] = 3,
- [I_ATXX] = 0,
- [I_ATYY] = 0,
- [I_ATZZ] = 0,
- [I_ATXY] = 0,
- [I_ATXZ] = 0,
- [I_ATYZ] = 0,
- [I_K] = 0,
- [I_K_DX] = 1,
- [I_K_DY] = 2,
- [I_K_DZ] = 3,
- [I_XTX] = 0,
- [I_XTY] = 0,
- [I_XTZ] = 0,
- [I_BETAX] = 0,
- [I_BETAY] = 0,
- [I_BETAZ] = 0,
- [I_ALPHA] = 0,
- [I_ALPHA_DX] = 1,
- [I_ALPHA_DY] = 2,
- [I_ALPHA_DZ] = 3,
- [I_ALPHA_DXX] = 11,
- [I_ALPHA_DYY] = 22,
- [I_ALPHA_DZZ] = 33,
- [I_ALPHA_DXY] = 12,
- [I_ALPHA_DXZ] = 13,
- [I_ALPHA_DYZ] = 23,
- [I_KDOT_XX] = 0,
- [I_KDOT_YY] = 0,
- [I_KDOT_ZZ] = 0,
- [I_KDOT_XY] = 0,
- [I_KDOT_XZ] = 0,
- [I_KDOT_YZ] = 0,
- [I_XTDOT_X] = 0,
- [I_XTDOT_Y] = 0,
- [I_XTDOT_Z] = 0,
- [I_PHIDOT] = 0,
- [I_PHIDOT_DX] = 1,
- [I_PHIDOT_DY] = 2,
- [I_PHIDOT_DZ] = 3,
-};
-
-static void init_opencl(MaximalSlicingContext *ms)
-{
- int err, count;
- cl_platform_id platform;
- cl_context_properties props[3];
-
- err = clGetPlatformIDs(1, &platform, &count);
- if (err != CL_SUCCESS || count < 1) {
- fprintf(stderr, "Could not get an OpenCL platform ID\n");
- return;
- }
-
- err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ms->ocl_device, &count);
- if (err != CL_SUCCESS || count < 1) {
- fprintf(stderr, "Could not get an OpenCL device ID\n");
- return;
- }
-
- props[0] = CL_CONTEXT_PLATFORM;
- props[1] = (cl_context_properties)platform;
- props[2] = 0;
-
- ms->cl_ctx = clCreateContext(props, 1, &ms->ocl_device, NULL, NULL, &err);
- if (err != CL_SUCCESS || !ms->cl_ctx) {
- fprintf(stderr, "Could not create an OpenCL context\n");
- return;
- }
-
- ms->cl_queue = clCreateCommandQueue(ms->cl_ctx, ms->ocl_device, 0, &err);
- if (err != CL_SUCCESS || !ms->cl_queue) {
- fprintf(stderr, "Could not create an OpenCL command queue: %d\n", err);
- goto fail;
- }
-
- err = clblasSetup();
- if (err != CL_SUCCESS) {
- fprintf(stderr, "Error setting up clBLAS\n");
- goto fail;
- }
-
- ms->ocl_coeffs = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
-
- ms->bicgstab.cl_p = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_v = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_y = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_z = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_t = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_res = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_res0 = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_tmp = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_tmp1 = clCreateBuffer(ms->cl_ctx, 0, 2 * ms->nb_coeffs * sizeof(double), NULL, &err);
-
- ms->bicgstab.cl_k = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_mat = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err);
-
- ms->bicgstab.cl_rho = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
- ms->bicgstab.cl_alpha = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
- ms->bicgstab.cl_beta = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
- ms->bicgstab.cl_omega = clCreateBuffer(ms->cl_ctx, 0, 2 * sizeof(double), NULL, &err);
- ms->bicgstab.cl_omega1 = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
-
- return;
-fail:
- if (ms->cl_queue)
- clReleaseCommandQueue(ms->cl_queue);
- ms->cl_queue = 0;
-
- if (ms->cl_ctx)
- clReleaseContext(ms->cl_ctx);
- ms->cl_ctx = 0;
-}
-
-static void construct_filter_matrix(MaximalSlicingContext *ms, double filter_power)
+/* get an approximate "main" frequency component in a basis function */
+static double calc_basis_freq(const BasisSet *b, int order)
{
- char equed = 'N';
- double cond, ferr, berr, rpivot;
-
- double *m, *minv, *scale, *tmp;
- int *ipiv;
- int idx_coeff_x, idx_coeff_z, idx_grid_x, idx_grid_z;
- int N = ms->nb_coeffs;
-
- ms->input_filter = malloc(sizeof(*m) * N * N);
-
- m = malloc(sizeof(*m) * N * N);
- minv = malloc(sizeof(*m) * N * N);
- scale = malloc(sizeof(*m) * N * N);
- tmp = malloc(sizeof(*m) * N * N);
- ipiv = malloc(sizeof(*ipiv) * N);
-
-#define BASIS_X (ms->basis_x_val [idx_grid_x * ms->nb_coeffs_x + idx_coeff_x])
-#define BASIS_Z (ms->basis_z_val [idx_grid_z * ms->nb_coeffs_z + idx_coeff_z])
- for (idx_grid_z = 0; idx_grid_z < ms->nb_colloc_points_z - 0; idx_grid_z++) {
- for (idx_grid_x = 0; idx_grid_x < ms->nb_colloc_points_x - 0; idx_grid_x++) {
- int idx_grid = idx_grid_z * ms->nb_colloc_points_x + idx_grid_x;
-
- for (idx_coeff_z = 0; idx_coeff_z < ms->nb_coeffs_z; idx_coeff_z++)
- for (idx_coeff_x = 0; idx_coeff_x < ms->nb_coeffs_x; idx_coeff_x++) {
- const int idx_coeff = idx_coeff_z * ms->nb_coeffs_x + idx_coeff_x;
-
- minv[idx_grid + ms->nb_colloc_points * idx_coeff] = BASIS_X * BASIS_Z;
- scale[idx_grid + ms->nb_colloc_points * idx_coeff] = (idx_grid == idx_coeff) ?
- exp(-36.0 * pow((double)idx_grid_x / ms->nb_coeffs_x, filter_power)) *
- exp(-36.0 * pow((double)idx_grid_z / ms->nb_coeffs_z, filter_power)) : 0.0;
-
- scale[idx_grid + ms->nb_colloc_points * idx_coeff] = (idx_grid == idx_coeff) ? 1.0 : 0.0;
- //if (idx_coeff_z == idx_grid_z && idx_coeff_z == 0 && idx_grid_x == idx_coeff_x)
- // fprintf(stderr, "%d %g\n", idx_coeff_x, scale[idx_grid + ms->nb_colloc_points * idx_coeff]);
- }
- }
- }
-
- memcpy(m, minv, sizeof(*m) * N * N);
- LAPACKE_dgetrf(LAPACK_COL_MAJOR, N, N, m, N, ipiv);
- LAPACKE_dgetri(LAPACK_COL_MAJOR, N, m, N, ipiv);
-
- cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
- N, N, N, 1.0, scale, N, m, N, 0.0, tmp, N);
- cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
- N, N, N, 1.0, minv, N, tmp, N, 0.0, ms->input_filter, N);
-
- free(m);
- free(minv);
- free(scale);
- free(tmp);
- free(ipiv);
-}
-
-static MaximalSlicingContext *init_ms(cGH *cctkGH,
- int basis_order_r, int basis_order_z,
- double sf, double filter_power, double input_filter_power,
- CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z,
- const int grid_size[3])
-{
- MaximalSlicingContext *ms;
- int ret;
-
- ms = calloc(1, sizeof(*ms));
-
- ms->gh = cctkGH;
-
- ms->basis = &qms_sb_even_basis;
- //ms->basis = &qms_cheb_basis;
- //ms->basis = &qms_cheb_even_basis;
- //ms->basis = &qms_tl_basis;
-#if POLAR
- ms->basis1 = &qms_cos_even_basis;
-#else
- ms->basis1 = &qms_sb_even_basis;
-#endif
-
- ms->nb_coeffs_x = basis_order_r;
- ms->nb_coeffs_z = basis_order_z;
-
- ms->nb_coeffs = ms->nb_coeffs_x * ms->nb_coeffs_z;
-
- ms->nb_colloc_points_x = basis_order_r;
- ms->nb_colloc_points_z = basis_order_z;
-
- ms->nb_colloc_points = ms->nb_colloc_points_x * ms->nb_colloc_points_z;
-
- if (ms->nb_colloc_points != ms->nb_coeffs)
- CCTK_WARN(0, "Non-square collocation matrix");
-
- ms->colloc_grid_order_x = ms->nb_colloc_points_x;
- ms->colloc_grid_order_z = ms->nb_colloc_points_z;
-
- ms->mat = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points);
- ms->coeffs = malloc(sizeof(double) * ms->nb_coeffs);
- ms->rhs = malloc(sizeof(double) * ms->nb_colloc_points);
-
- ms->coeffs_eval = malloc(sizeof(double) * ms->nb_coeffs);
- for (int i = 0; i < ARRAY_ELEMS(ms->solution_cache); i++) {
- ms->solution_cache[i].coeffs = malloc(sizeof(double) * ms->nb_coeffs);
- if (!ms->solution_cache[i].coeffs)
- CCTK_WARN(0, "malloc failure");
- }
-
- ms->mat_f = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points);
- ms->ipiv = malloc(sizeof(*ms->ipiv) * ms->nb_coeffs);
-
-#if 1
- scale_factor = 1.0;
-
- //scale_factor = (x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] * 0.75) / ms->basis->colloc_point(ms->colloc_grid_order_x, ms->nb_colloc_points_x - 1);
- scale_factor = (64.0 / ms->basis->colloc_point(ms->colloc_grid_order_x, ms->nb_colloc_points_x - 1));
- //scale_factor = (x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)]);
- //scale_factor = x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] - 0.5;
- fprintf(stderr, "scale factor %16.16g\n", scale_factor);
-
-#else
- scale_factor = sf;
-#endif
-
- /* initialize the collocation grid */
- posix_memalign((void**)&ms->colloc_grid[0], 32, ms->nb_colloc_points_x * sizeof(*ms->colloc_grid[0]));
- posix_memalign((void**)&ms->colloc_grid[1], 32, ms->nb_colloc_points_z * sizeof(*ms->colloc_grid[1]));
-
- for (int i = 0; i < ms->nb_colloc_points_x; i++) {
- ms->colloc_grid[0][i] = ms->basis->colloc_point(ms->colloc_grid_order_x, i);
- fprintf(stderr, "%d %g\n", i, ms->colloc_grid[0][i]);
- }
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- ms->colloc_grid[1][i] = ms->basis1->colloc_point(ms->colloc_grid_order_z, i);
- fprintf(stderr, "%d %g\n", i, ms->colloc_grid[1][i] / (POLAR ? M_PI : 1.0));
- }
-
- /* precompute the basis values we will need */
- ms->basis_x_val = malloc(sizeof(*ms->basis_x_val) * ms->nb_colloc_points_x * ms->nb_coeffs_x);
- ms->basis_x_dval = malloc(sizeof(*ms->basis_x_dval) * ms->nb_colloc_points_x * ms->nb_coeffs_x);
- ms->basis_x_d2val = malloc(sizeof(*ms->basis_x_d2val) * ms->nb_colloc_points_x * ms->nb_coeffs_x);
- for (int i = 0; i < ms->nb_colloc_points_x; i++) {
- CCTK_REAL coord = ms->colloc_grid[0][i];
- for (int j = 0; j < ms->nb_coeffs_x; j++) {
- ms->basis_x_val [i * ms->nb_coeffs_x + j] = ms->basis->eval(coord, j);
- ms->basis_x_dval [i * ms->nb_coeffs_x + j] = ms->basis->eval_diff1(coord, j);
- ms->basis_x_d2val[i * ms->nb_coeffs_x + j] = ms->basis->eval_diff2(coord, j);
- }
- }
-
- ms->basis_z_val = malloc(sizeof(*ms->basis_z_val) * ms->nb_colloc_points_z * ms->nb_coeffs_z);
- ms->basis_z_dval = malloc(sizeof(*ms->basis_z_dval) * ms->nb_colloc_points_z * ms->nb_coeffs_z);
- ms->basis_z_d2val = malloc(sizeof(*ms->basis_z_d2val) * ms->nb_colloc_points_z * ms->nb_coeffs_z);
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- CCTK_REAL coord = ms->colloc_grid[1][i];
- for (int j = 0; j < ms->nb_coeffs_z; j++) {
- ms->basis_z_val [i * ms->nb_coeffs_z + j] = ms->basis1->eval(coord, j);
- ms->basis_z_dval [i * ms->nb_coeffs_z + j] = ms->basis1->eval_diff1(coord, j);
- ms->basis_z_d2val[i * ms->nb_coeffs_z + j] = ms->basis1->eval_diff2(coord, j);
- }
- }
-
- posix_memalign((void**)&ms->basis_val_00, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00));
- posix_memalign((void**)&ms->basis_val_11, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00));
- posix_memalign((void**)&ms->basis_val_10, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00));
- posix_memalign((void**)&ms->basis_val_01, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00));
- posix_memalign((void**)&ms->basis_val_02, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00));
- posix_memalign((void**)&ms->basis_val_20, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00));
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- const double *basis_z = ms->basis_z_val + i * ms->nb_coeffs_z;
- const double *dbasis_z = ms->basis_z_dval + i * ms->nb_coeffs_z;
- const double *d2basis_z = ms->basis_z_d2val + i * ms->nb_coeffs_z;
-
- for (int j = 0; j < ms->nb_colloc_points_x; j++) {
- const double *basis_x = ms->basis_x_val + j * ms->nb_coeffs_x;
- const double *dbasis_x = ms->basis_x_dval + j * ms->nb_coeffs_x;
- const double *d2basis_x = ms->basis_x_d2val + j * ms->nb_coeffs_x;
- const int idx_grid = i * ms->nb_colloc_points_x + j;
-
-#if POLAR
- double r = ms->colloc_grid[0][j];
- double theta = ms->colloc_grid[1][i];
-
- double x = r * cos(theta);
- double z = r * sin(theta);
-#else
- double x = ms->colloc_grid[0][j];
- double z = ms->colloc_grid[1][i];
-#endif
-
- for (int k = 0; k < ms->nb_coeffs_z; k++)
- for (int l = 0; l < ms->nb_coeffs_x; l++) {
- const int idx_coeff = k * ms->nb_coeffs_x + l;
- const int idx = idx_grid + ms->nb_colloc_points * idx_coeff;
- ms->basis_val_00[idx] = basis_x[l] * basis_z[k];
-#if POLAR
- ms->basis_val_10[idx] = ((r > EPS) ? (dbasis_x[l] * basis_z[k] * x / r - basis_x[l] * dbasis_z[k] * z / SQR(r)) : 0.0);
- ms->basis_val_01[idx] = ((r > EPS) ? (dbasis_x[l] * basis_z[k] * z / r + basis_x[l] * dbasis_z[k] * x / SQR(r)) : 0.0);
- ms->basis_val_20[idx] = ((r > EPS) ? (SQR(x / r) * d2basis_x[l] * basis_z[k] + SQR(z / SQR(r)) * basis_x[l] * d2basis_z[k]
- + (1.0 - SQR(x / r)) / r * dbasis_x[l] * basis_z[k]
- + 2 * x * z / SQR(SQR(r)) * basis_x[l] * dbasis_z[k]
- - 2 * z * x / (r * SQR(r)) * dbasis_x[l] * dbasis_z[k]) : 0.0);
- ms->basis_val_02[idx] = ((r > EPS) ? (SQR(z / r) * d2basis_x[l] * basis_z[k] + SQR(x / SQR(r)) * basis_x[l] * d2basis_z[k]
- + (1.0 - SQR(z / r)) / r * dbasis_x[l] * basis_z[k]
- - 2 * x * z / SQR(SQR(r)) * basis_x[l] * dbasis_z[k]
- + 2 * z * x / (r * SQR(r)) * dbasis_x[l] * dbasis_z[k]) : 0.0);
- ms->basis_val_11[idx] = ((r > EPS) ? (x * z / SQR(r) * d2basis_x[l] * basis_z[k] - x * z / SQR(SQR(r)) * basis_x[l] * d2basis_z[k]
- - x * z / (r * SQR(r)) * dbasis_x[l] * basis_z[k]
- - (1.0 - SQR(z / r)) / SQR(r) * basis_x[l] * dbasis_z[k]
- + (SQR(x) - SQR(z)) / (r * SQR(r)) * dbasis_x[l] * dbasis_z[k]) : 0.0);
-#else
- ms->basis_val_10[idx] = dbasis_x[l] * basis_z[k];
- ms->basis_val_01[idx] = basis_x[l] * dbasis_z[k];
- ms->basis_val_20[idx] = d2basis_x[l] * basis_z[k];
- ms->basis_val_02[idx] = basis_x[l] * d2basis_z[k];
- ms->basis_val_11[idx] = dbasis_x[l] * dbasis_z[k];
-#endif
- }
- }
- }
-
- posix_memalign((void**)&ms->eq_coeff_00, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00));
- posix_memalign((void**)&ms->eq_coeff_11, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00));
- posix_memalign((void**)&ms->eq_coeff_10, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00));
- posix_memalign((void**)&ms->eq_coeff_01, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00));
- posix_memalign((void**)&ms->eq_coeff_02, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00));
- posix_memalign((void**)&ms->eq_coeff_20, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00));
-
- ms->interp_coords[0] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[0]));
- ms->interp_coords[1] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[1]));
- ms->interp_coords[2] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[2]));
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- for (int j = 0; j < ms->nb_colloc_points_x; j++) {
-#if POLAR
- double phi = ms->colloc_grid[1][i];
- double r = ms->colloc_grid[0][j];
-
- double x = r * cos(phi);
- double z = r * sin(phi);
-#else
- double x = ms->colloc_grid[0][j];
- double z = ms->colloc_grid[1][i];
-#endif
-
- ms->interp_coords[0][i * ms->nb_colloc_points_x + j] = x;
- ms->interp_coords[1][i * ms->nb_colloc_points_x + j] = 0;
- ms->interp_coords[2][i * ms->nb_colloc_points_x + j] = z;
- }
- }
-
- for (int i = 0; i < ARRAY_ELEMS(ms->metric_u); i++)
- ms->metric_u[i] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_values[i]));
-
- ms->kij_kij = malloc(ms->nb_colloc_points * sizeof(*ms->kij_kij));
-
- ms->coeff_scale = malloc(ms->nb_coeffs * sizeof(double));
- for (int j = 0; j < ms->nb_coeffs_z; j++)
- for (int i = 0; i < ms->nb_coeffs_x; i++) {
- //ms->coeff_scale[j * ms->nb_coeffs_x + i] = 1.0;
- ms->coeff_scale[j * ms->nb_coeffs_x + i] = exp(-36.0 * pow((double)i / ms->nb_coeffs_x, filter_power)) *
- exp(-36.0 * pow((double)j / ms->nb_coeffs_z, filter_power));
- //ms->coeff_scale[j * ms->nb_coeffs_x + i] = ((i < (2.0 / 3.0) * ms->nb_coeffs_x) ? 1.0 : SQR(cos((((double)i / ms->nb_coeffs_x) - (2.0 / 3.0)) * 3.0 * M_PI / 2.0))) *
- // ((j < (2.0 / 3.0) * ms->nb_coeffs_z) ? 1.0 : SQR(cos((((double)j / ms->nb_coeffs_z) - (2.0 / 3.0)) * 3.0 * M_PI / 2.0)));
- }
-
- for (int i = 0; i < ARRAY_ELEMS(ms->interp_values); i++) {
- ms->interp_values[i] = malloc(sizeof(*ms->interp_values[i]) * ms->nb_colloc_points);
- ms->interp_values_prefilter[i] = malloc(sizeof(*ms->interp_values[i]) * ms->nb_colloc_points);
- if (!ms->interp_values[i] || !ms->interp_values_prefilter[i])
- CCTK_WARN(0, "Malloc failure");
- ms->interp_value_codes[i] = CCTK_VARIABLE_REAL;
- }
-
- for (int i = 0; i < ARRAY_ELEMS(metric_vars); i++) {
- ms->interp_vars_indices[i] = CCTK_VarIndex(metric_vars[i]);
- if (ms->interp_vars_indices[i] < 0)
- CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING, "Error getting the index of variable: %s\n", metric_vars[i]);
- }
-
- ms->coord_system = CCTK_CoordSystemHandle("cart3d");
- if (ms->coord_system < 0)
- CCTK_WARN(0, "Error getting the coordinate system");
-
- ms->interp_operator = CCTK_InterpHandle("Lagrange polynomial interpolation (tensor product)");
- if (ms->interp_operator < 0)
- CCTK_WARN(0, "Error getting the interpolation operator");
-
- ms->interp_params = Util_TableCreateFromString("order=4 want_global_mode=1");
- if (ms->interp_params < 0)
- CCTK_WARN(0, "Error creating interpolation parameters table");
-
- ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS,
- interp_operation_codes, "operation_codes");
- if (ret < 0)
- CCTK_WARN(0, "Error setting operation codes");
-
- ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS,
- interp_operation_indices, "operand_indices");
- if (ret < 0)
- CCTK_WARN(0, "Error setting operand indices");
-
- ms->bicgstab.p = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.v = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.y = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.z = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.t = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.res = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.res0 = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.k = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points);
-
- ms->steps_since_inverse = INT_MAX;
-
- init_opencl(ms);
-
- construct_filter_matrix(ms, input_filter_power);
-
- CCTK_TimerCreate("MaximalSlicingAxi_Solve");
- CCTK_TimerCreate("MaximalSlicingAxi_Expand");
- CCTK_TimerCreate("MaximalSlicingAxi_interp_geometry");
- CCTK_TimerCreate("MaximalSlicingAxi_calc_eq_coeffs");
- CCTK_TimerCreate("MaximalSlicingAxi_construct_matrix");
- CCTK_TimerCreate("MaximalSlicingAxi_filter_input");
- CCTK_TimerCreate("MaximalSlicingAxi_solve_LU");
- CCTK_TimerCreate("MaximalSlicingAxi_solve_BiCGSTAB");
- CCTK_TimerCreate("MaximalSlicingAxi_Polish");
-
- return ms;
+ return b->colloc_point(order, 1);
}
-static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
- CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z)
+static CoordPatch *get_coord_patch(QMSContext *ms,
+ CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z,
+ double scale_factor, double scale_power)
{
cGH *cctkGH = ms->gh;
@@ -645,48 +74,9 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
if (i == cp->size[1])
CCTK_WARN(0, "The grid does not include y==0");
-#if 0
- posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]);
- for (int j = 0; j < ms->gh->cctk_lsh[1]; j++)
- for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) {
- CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, j, 0)];
- CCTK_REAL yy = y[CCTK_GFINDEX3D(ms->gh, i, j, 0)];
- CCTK_REAL r = sqrt(SQR(xx) + SQR(yy));
-
- for (int k = 0; k < ms->nb_coeffs_x; k++)
- //cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) * ms->nb_coeffs_x + k] = ms->basis->eval(r, k);
- cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) + ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0] * k] = ms->basis->eval(r, k);
- }
-
- posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]);
- for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) {
- CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)];
- for (int j = 0; j < ms->nb_coeffs_z; j++)
- cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j);
- //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j);
- }
- posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->nb_coeffs_x);
- posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size);
- for (int i = 0; i < grid_size; i++)
- cp->one[i] = 1.0;
-#else
- posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[0]);
- for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) {
- CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)];
-
- for (int k = 0; k < ms->nb_coeffs_x; k++)
- cp->basis_val_r[i * ms->nb_coeffs_x + k] = ms->basis->eval(fabs(xx), k);
- }
-
- posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]);
- for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) {
- CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)];
- for (int j = 0; j < ms->nb_coeffs_z; j++)
- cp->basis_val_z[i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j);
- }
-
- posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * ms->nb_coeffs_x * cp->size[0] * cp->size[2]);
- posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * ms->nb_coeffs_z * cp->size[0] * cp->size[2]);
+#if QMS_POLAR || 1
+ posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * ms->solver->nb_coeffs[0] * cp->size[0] * cp->size[2]);
+ posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * ms->solver->nb_coeffs[1] * cp->size[0] * cp->size[2]);
#pragma omp parallel for
for (int j = 0; j < cp->size[2]; j++) {
CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)];
@@ -694,10 +84,11 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
for (int i = 0; i < cp->size[0]; i++) {
const int idx_grid = j * cp->size[0] + i;
- CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)];
+ double xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)];
+ double rr = sqrt(SQR(xx) + SQR(zz));
-#if POLAR
- double coord0 = sqrt(SQR(xx) + SQR(zz));
+#if QMS_POLAR
+ double coord0 = rr;
double coord1 = atan2(zz, xx);
#else
double coord0 = xx;
@@ -709,51 +100,88 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
// const int idx_coeff = k * ms->nb_coeffs_x + l;
// cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * idx_coeff] = ms->basis->eval(r, l) * ms->basis1->eval(phi, k);
// }
- for (int k = 0; k < ms->nb_coeffs_x; k++)
- cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->basis->eval(coord0, k);
- for (int k = 0; k < ms->nb_coeffs_z; k++)
- cp->transform_matrix1[idx_grid * ms->nb_coeffs_z + k] = ms->basis1->eval(coord1, k);
+ for (int k = 0; k < ms->solver->nb_coeffs[0]; k++) {
+ double dx = calc_basis_freq(ms->solver->basis[0], k);
+ double r0 = dx * scale_factor;
+ double fact = exp(-36.0 * pow(rr / r0, scale_power));
+
+ cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->solver->basis[0]->eval(coord0, k) * fact;
+ }
+ for (int k = 0; k < ms->solver->nb_coeffs[1]; k++) {
+ double dx = calc_basis_freq(ms->solver->basis[1], k);
+ double r0 = dx * scale_factor;
+ double fact = exp(-36.0 * pow(rr / r0, scale_power));
+
+ cp->transform_matrix1[idx_grid * ms->solver->nb_coeffs[1] + k] = ms->solver->basis[1]->eval(coord1, k) * fact;
+ }
}
}
- posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * ms->nb_coeffs_z);
+ posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * ms->solver->nb_coeffs[1]);
+#else
+ posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->solver->nb_coeffs[0] * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]);
+ for (int j = 0; j < ms->gh->cctk_lsh[1]; j++)
+ for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) {
+ CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, j, 0)];
+ CCTK_REAL yy = y[CCTK_GFINDEX3D(ms->gh, i, j, 0)];
+ CCTK_REAL r = sqrt(SQR(xx) + SQR(yy));
+
+ for (int k = 0; k < ms->solver->nb_coeffs[0]; k++)
+ //cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) * ms->nb_coeffs_x + k] = ms->basis->eval(r, k);
+ cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) + ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0] * k] = ms->solver->basis[0]->eval(r, k);
+ }
+
+ posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->solver->nb_coeffs[1] * ms->gh->cctk_lsh[2]);
+ for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) {
+ CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)];
+ for (int j = 0; j < ms->solver->nb_coeffs[1]; j++)
+ cp->basis_val_z [i * ms->solver->nb_coeffs[1] + j] = ms->solver->basis[0]->eval(fabs(zz), j);
+ //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j);
+ }
+ posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->solver->nb_coeffs[0]);
+ posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size);
+ for (int i = 0; i < grid_size; i++)
+ cp->one[i] = 1.0;
+
+ posix_memalign((void**)&cp->w_scale, 32, sizeof(*cp->w_scale) * grid_size);
+ for (int k = 0; k < ms->gh->cctk_lsh[2]; k++)
+ for (int j = 0; j < ms->gh->cctk_lsh[1]; j++)
+ for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) {
+ int idx = CCTK_GFINDEX3D(ms->gh, i, j, k);
+ double r = sqrt(SQR(x[idx]) + SQR(y[idx]) + SQR(z[idx]));
+ const double R = 32.0;
+ const double width = 4.0;
+ cp->w_scale[idx] = (r > R) ? exp(-pow((r - R) / width, 4.0)) : 1.0;
+ }
#endif
ms->nb_patches++;
return cp;
}
-static MaximalSlicingContext *ms_context;
+static QMSContext *qms_context;
void quasimaximal_slicing_axi_solve(CCTK_ARGUMENTS)
{
- MaximalSlicingContext *ms;
+ QMSContext *ms;
DECLARE_CCTK_ARGUMENTS;
DECLARE_CCTK_PARAMETERS;
double time;
- if (!ms_context) {
- ms_context = init_ms(cctkGH, basis_order_r, basis_order_z,
- scale_factor, filter_power, input_filter_power, x, y, z, cctk_lsh);
- }
- ms = ms_context;
+ ms = qms_context;
time = cctkGH->cctk_time / ms->gh->cctk_delta_time;
- if (ms->gh->cctk_levfac[0] != 1 ||
- fabs(time - ceilf(time)) > 1e-8)
+ if (ms->gh->cctk_levfac[0] != 1 || fabs(time - ceilf(time)) > 1e-8 ||
+ (ms->nb_solutions && ms->solution_cache[ms->nb_solutions - 1].time == cctkGH->cctk_time))
return;
- fprintf(stderr, "qms solve: time %g %g\n", ms->gh->cctk_time, time);
-
- CCTK_TimerStart("MaximalSlicingAxi_Solve");
- qms_maximal_solve(ms);
- CCTK_TimerStop("MaximalSlicingAxi_Solve");
-
- if (export_coeffs)
- memcpy(w_coeffs, ms->coeffs, sizeof(*w_coeffs) * ms->nb_coeffs);
+ CCTK_TimerStart("QuasiMaximalSlicing_Solve");
+ qms_solver_solve(ms->solver);
+ CCTK_TimerStop("QuasiMaximalSlicing_Solve");
+ fprintf(stderr, "%d qms solve: time %g %g %g\n", ms->gh->cctk_levfac[0], ms->gh->cctk_time, time, ms->solver->coeffs[0]);
if (1) {
double *tmp;
if (ms->nb_solutions == ARRAY_ELEMS(ms->solution_cache)) {
@@ -763,45 +191,39 @@ void quasimaximal_slicing_axi_solve(CCTK_ARGUMENTS)
ms->nb_solutions++;
tmp = ms->solution_cache[ms->nb_solutions - 1].coeffs;
}
- ms->solution_cache[ms->nb_solutions - 1].coeffs = ms->coeffs;
+ ms->solution_cache[ms->nb_solutions - 1].coeffs = ms->solver->coeffs;
ms->solution_cache[ms->nb_solutions - 1].time = ms->gh->cctk_time;
- ms->coeffs = tmp;
+ ms->solver->coeffs = tmp;
}
}
void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS)
{
- MaximalSlicingContext *ms;
+ QMSContext *ms;
CoordPatch *cp;
DECLARE_CCTK_ARGUMENTS;
DECLARE_CCTK_PARAMETERS;
- int64_t expand_start, totaltime_start;
+ int64_t expand_start;
double *coeffs = NULL;
int i, ret;
- totaltime_start = gettime();
-
- /* on the first run, init the solver */
- if (!ms_context) {
- ms_context = init_ms(cctkGH, basis_order_r, basis_order_z,
- scale_factor, filter_power, input_filter_power, x, y, z, cctk_lsh);
- }
- ms = ms_context;
+ ms = qms_context;
- cp = get_coord_patch(ms, x, y, z);
+ cp = get_coord_patch(ms, x, y, z, scale_factor, scale_power);
#if 0
- coeffs = ms->coeffs;
+ //coeffs = ms->coeffs;
+ coeffs = ms->solution_cache[ms->nb_solutions - 1].coeffs;
#else
coeffs = ms->coeffs_eval;
- if (ms->nb_solutions < 2) {
- memset(coeffs, 0, sizeof(*coeffs) * ms->nb_coeffs);
+ if (cctkGH->cctk_levfac[0] < 1 || ms->nb_solutions < 2) {
+ memset(coeffs, 0, sizeof(*coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]);
//fprintf(stderr, "qms eval: time %g zero\n", ms->gh->cctk_time);
} else {
double *coeffs0 = ms->solution_cache[ms->nb_solutions - 2].coeffs;
@@ -810,16 +232,28 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS)
double time1 = ms->solution_cache[ms->nb_solutions - 1].time;
double time = ms->gh->cctk_time;
- //fprintf(stderr, "qms eval: time %g interp from %g %g\n", ms->gh->cctk_time, time0, time1);
+ //double fact;
- for (int i = 0; i < ms->nb_coeffs; i++)
+ //if (time > 2.0)
+ // fact = 1.0;
+ //else if (time < 0.1)
+ // fact = 0.0;
+ //else
+ // fact = (1.0 - exp(-pow((time - 0.0) / 0.25, 4.0)));
+ //fact = 1.0;
+
+ //fprintf(stderr, "qms eval: time %g interp from %g %g %g\n", ms->gh->cctk_time, time0, time1, fact);
+
+ for (int i = 0; i < ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]; i++)
coeffs[i] = coeffs1[i] * (time - time0) / (time1 - time0) + coeffs0[i] * (time - time1) / (time0 - time1);
}
#endif
+ if (export_coeffs)
+ memcpy(w_coeffs, coeffs, sizeof(*w_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]);
- CCTK_TimerStart("MaximalSlicingAxi_Expand");
+ CCTK_TimerStart("QuasiMaximalSlicing_Expand");
expand_start = gettime();
#if 0
#pragma omp parallel for
@@ -831,106 +265,115 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS)
double r = sqrt(SQR(xx) + SQR(zz));
double phi = atan2(zz, xx);
- double val = 1.0;
+ double val = 0.0;
for (int l = 0; l < ms->nb_coeffs_z; l++) {
double tmp = 0.0;
for (int m = 0; m < ms->nb_coeffs_x; m++) {
const int idx_coeff = l * ms->nb_coeffs_x + m;
- tmp += ms->coeffs[idx_coeff] * ms->basis->eval(r, m);
+ tmp += coeffs[idx_coeff] * ms->basis->eval(r, m);
}
val += tmp * ms->basis1->eval(phi, l);
}
- alp[idx] = val;
+ W[idx] = val;
}
}
-#else
+#elif QMS_POLAR || 1
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
- cctk_lsh[0] * cctk_lsh[2], ms->nb_coeffs_z, ms->nb_coeffs_x,
+ cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0],
1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2],
- coeffs, ms->nb_coeffs_x, 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]);
+ coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]);
#pragma omp parallel for
for (int j = 0; j < cctk_lsh[2]; j++)
for (int i = 0; i < cctk_lsh[0]; i++) {
const int idx_grid = j * cctk_lsh[0] + i;
- const double val = cblas_ddot(ms->nb_coeffs_z, cp->transform_matrix1 + idx_grid * ms->nb_coeffs_z, 1,
+ const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1,
cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]);
W[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = val;
}
-#endif
+#else
//memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp));
- //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp));
- //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
- // ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0,
- // coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z,
- // 0.0, cp->transform_z, ms->nb_coeffs_x);
- //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
- // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0,
- // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x,
- // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]);
+ memset(W, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*W));
+ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ ms->solver->nb_coeffs[0], cctk_lsh[2], ms->solver->nb_coeffs[1], 1.0,
+ coeffs, ms->solver->nb_coeffs[0], cp->basis_val_z, ms->solver->nb_coeffs[1],
+ 0.0, cp->transform_z, ms->solver->nb_coeffs[0]);
+ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->solver->nb_coeffs[0], 1.0,
+ cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->solver->nb_coeffs[0],
+ 1.0, W, cctk_lsh[0] * cctk_lsh[1]);
+
+// {
+// const int grid_size = cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2];
+//#pragma omp parallel for
+// for (int i = 0; i < grid_size; i++)
+// W[i] *= cp->w_scale[i];
+// }
+#endif
ms->grid_expand_time += gettime() - expand_start;
ms->grid_expand_count++;
- CCTK_TimerStop("MaximalSlicingAxi_Expand");
-
- ms->solve_time += gettime() - totaltime_start;
- ms->solve_count++;
+ CCTK_TimerStop("QuasiMaximalSlicing_Expand");
/* print stats */
- if (!(ms->solve_count & 255)) {
- fprintf(stderr,
- "maximal slicing solves: %lu, "
- "total time %g s, avg time per call %g ms\n",
- ms->solve_count, (double)ms->solve_time / 1e6,
- (double)ms->solve_time / ms->solve_count / 1e3);
- fprintf(stderr,
- "%g%% interpolate geometry: %lu, "
- "total time %g s, avg time per call %g ms\n",
- (double)ms->interp_geometry_time * 100 / ms->solve_time,
- ms->interp_geometry_count, (double)ms->interp_geometry_time / 1e6,
- (double)ms->interp_geometry_time / ms->interp_geometry_count / 1e3);
- fprintf(stderr,
- "%g%% calc equation coefficients: %lu, "
- "total time %g s, avg time per call %g ms\n",
- (double)ms->calc_eq_coeffs_time * 100 / ms->solve_time,
- ms->calc_eq_coeffs_count, (double)ms->calc_eq_coeffs_time / 1e6,
- (double)ms->calc_eq_coeffs_time / ms->calc_eq_coeffs_count / 1e3);
- fprintf(stderr,
- "%g%% pseudospectral matrix construction: %lu, "
- "total time %g s, avg time per call %g ms\n",
- (double)ms->construct_matrix_time * 100 / ms->solve_time,
- ms->construct_matrix_count, (double)ms->construct_matrix_time / 1e6,
- (double)ms->construct_matrix_time / ms->construct_matrix_count / 1e3);
- fprintf(stderr,
- "%g%% BiCGSTAB %lu solves, "
- "%lu iterations, total time %g s, "
- "avg iterations per solve %g, avg time per solve %g ms, "
- "avg time per iteration %g ms\n",
- (double)ms->cg_time_total * 100 / ms->solve_time,
- ms->cg_solve_count, ms->cg_iter_count, (double)ms->cg_time_total / 1e6,
- (double)ms->cg_iter_count / ms->cg_solve_count,
- (double)ms->cg_time_total / ms->cg_solve_count / 1e3,
- (double)ms->cg_time_total / ms->cg_iter_count / 1e3);
- fprintf(stderr,
- "%g%% LU %lu solves, total time %g s, avg time per solve %g ms\n",
- (double)ms->lu_solves_time * 100 / ms->solve_time,
- ms->lu_solves_count, (double)ms->lu_solves_time / 1e6,
- (double)ms->lu_solves_time / ms->lu_solves_count / 1e3);
+ if (!(ms->grid_expand_count & 255)) {
+ fprintf(stderr, "Quasi-maximal slicing stats:\n");
+
+ qms_solver_print_stats(ms->solver);
+
fprintf(stderr,
- "%g%% grid expansion: %lu, total time %g s, avg time per call %g ms\n",
- (double)ms->grid_expand_time * 100 / ms->solve_time,
+ "%lu evals: total time %g s, avg time per call %g ms\n",
ms->grid_expand_count, (double)ms->grid_expand_time / 1e6,
(double)ms->grid_expand_time / ms->grid_expand_count / 1e3);
}
}
+static int context_init(cGH *cctkGH)
+{
+ QMSContext *qms;
+ int ret;
+
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ qms = calloc(1, sizeof(*qms));
+ if (!qms)
+ return -ENOMEM;
+
+ qms->gh = cctkGH;
+
+ ret = qms_solver_init(&qms->solver, cctkGH, basis_order_r, basis_order_z,
+ scale_factor, filter_power, 0.0);
+ if (ret < 0)
+ return ret;
+
+ ret = posix_memalign((void**)&qms->coeffs_eval, 32,
+ basis_order_r * basis_order_z * sizeof(*qms->coeffs_eval));
+ if (ret)
+ return -ENOMEM;
+
+ for (int i = 0; i < ARRAY_ELEMS(qms->solution_cache); i++) {
+ ret = posix_memalign((void**)&qms->solution_cache[i].coeffs, 32,
+ basis_order_r * basis_order_z * sizeof(*qms->solution_cache[i].coeffs));
+ if (ret)
+ return -ENOMEM;
+ }
+
+ qms_context = qms;
+
+ return 0;
+}
+
void qms_init(CCTK_ARGUMENTS)
{
DECLARE_CCTK_ARGUMENTS;
DECLARE_CCTK_PARAMETERS;
+ if (!qms_context)
+ context_init(cctkGH);
+
double *Kdot11 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot11");
double *Kdot22 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot22");
double *Kdot33 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot33");