summaryrefslogtreecommitdiff
path: root/src/maximal_slicing_axi.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/maximal_slicing_axi.c')
-rw-r--r--src/maximal_slicing_axi.c631
1 files changed, 171 insertions, 460 deletions
diff --git a/src/maximal_slicing_axi.c b/src/maximal_slicing_axi.c
index 48bb4c9..e5b0d85 100644
--- a/src/maximal_slicing_axi.c
+++ b/src/maximal_slicing_axi.c
@@ -21,446 +21,26 @@
#include "util_Table.h"
#include "maximal_slicing_axi.h"
-
-#define ACC_TEST 0
-
-/*
- * The basis of even (n = 2 * idx) SB functions (Boyd 2000, Ch 17.9)
- * SB(x, n) = sin((n + 1) arccot(|x| / L))
- * They are symmetric wrt origin and decay as 1/x in infinity.
- */
+#include "ms_solve.h"
double scale_factor;
-
-/* mapping between our indices and thorn names */
-static const char *metric_vars[] = {
- [GTXX] = "ML_BSSN::gt11",
- [GTYY] = "ML_BSSN::gt22",
- [GTZZ] = "ML_BSSN::gt33",
- [GTXY] = "ML_BSSN::gt12",
- [GTXZ] = "ML_BSSN::gt13",
- [GTYZ] = "ML_BSSN::gt23",
- [ATXX] = "ML_BSSN::At11",
- [ATYY] = "ML_BSSN::At22",
- [ATZZ] = "ML_BSSN::At33",
- [ATXY] = "ML_BSSN::At12",
- [ATXZ] = "ML_BSSN::At13",
- [ATYZ] = "ML_BSSN::At23",
- [PHI] = "ML_BSSN::phi",
- [K] = "ML_BSSN::trK",
- [XTX] = "ML_BSSN::Xt1",
- [XTY] = "ML_BSSN::Xt2",
- [XTZ] = "ML_BSSN::Xt3",
- [BETAX] = "ML_BSSN::beta1",
- [BETAY] = "ML_BSSN::beta2",
- [BETAZ] = "ML_BSSN::beta3",
-};
-
-/* mapping between the cactus grid values and interpolated values */
-static const CCTK_INT interp_operation_indices[] = {
- [I_GTXX] = GTXX,
- [I_GTXX_DX] = GTXX,
- [I_GTXX_DY] = GTXX,
- [I_GTXX_DZ] = GTXX,
- [I_GTYY] = GTYY,
- [I_GTYY_DX] = GTYY,
- [I_GTYY_DY] = GTYY,
- [I_GTYY_DZ] = GTYY,
- [I_GTZZ] = GTZZ,
- [I_GTZZ_DX] = GTZZ,
- [I_GTZZ_DY] = GTZZ,
- [I_GTZZ_DZ] = GTZZ,
- [I_GTXY] = GTXY,
- [I_GTXY_DX] = GTXY,
- [I_GTXY_DY] = GTXY,
- [I_GTXY_DZ] = GTXY,
- [I_GTXZ] = GTXZ,
- [I_GTXZ_DX] = GTXZ,
- [I_GTXZ_DY] = GTXZ,
- [I_GTXZ_DZ] = GTXZ,
- [I_GTYZ] = GTYZ,
- [I_GTYZ_DX] = GTYZ,
- [I_GTYZ_DY] = GTYZ,
- [I_GTYZ_DZ] = GTYZ,
- [I_PHI] = PHI,
- [I_PHI_DX] = PHI,
- [I_PHI_DY] = PHI,
- [I_PHI_DZ] = PHI,
- [I_ATXX] = ATXX,
- [I_ATYY] = ATYY,
- [I_ATZZ] = ATZZ,
- [I_ATXY] = ATXY,
- [I_ATXZ] = ATXZ,
- [I_ATYZ] = ATYZ,
- [I_K] = K,
- [I_K_DX] = K,
- [I_K_DY] = K,
- [I_K_DZ] = K,
- [I_XTX] = XTX,
- [I_XTY] = XTY,
- [I_XTZ] = XTZ,
- [I_BETAX] = BETAX,
- [I_BETAY] = BETAY,
- [I_BETAZ] = BETAZ,
-};
-
-/* the operation (plain value or x/y/z-derivative) to apply during interpolation */
-static const CCTK_INT interp_operation_codes[] = {
- [I_GTXX] = 0,
- [I_GTXX_DX] = 1,
- [I_GTXX_DY] = 2,
- [I_GTXX_DZ] = 3,
- [I_GTYY] = 0,
- [I_GTYY_DX] = 1,
- [I_GTYY_DY] = 2,
- [I_GTYY_DZ] = 3,
- [I_GTZZ] = 0,
- [I_GTZZ_DX] = 1,
- [I_GTZZ_DY] = 2,
- [I_GTZZ_DZ] = 3,
- [I_GTXY] = 0,
- [I_GTXY_DX] = 1,
- [I_GTXY_DY] = 2,
- [I_GTXY_DZ] = 3,
- [I_GTXZ] = 0,
- [I_GTXZ_DX] = 1,
- [I_GTXZ_DY] = 2,
- [I_GTXZ_DZ] = 3,
- [I_GTYZ] = 0,
- [I_GTYZ_DX] = 1,
- [I_GTYZ_DY] = 2,
- [I_GTYZ_DZ] = 3,
- [I_PHI] = 0,
- [I_PHI_DX] = 1,
- [I_PHI_DY] = 2,
- [I_PHI_DZ] = 3,
- [I_ATXX] = 0,
- [I_ATYY] = 0,
- [I_ATZZ] = 0,
- [I_ATXY] = 0,
- [I_ATXZ] = 0,
- [I_ATYZ] = 0,
- [I_K] = 0,
- [I_K_DX] = 1,
- [I_K_DY] = 2,
- [I_K_DZ] = 3,
- [I_XTX] = 0,
- [I_XTY] = 0,
- [I_XTZ] = 0,
- [I_BETAX] = 0,
- [I_BETAY] = 0,
- [I_BETAZ] = 0,
-};
-
-static void init_opencl(MaximalSlicingContext *ms)
+/* get an approximate "main" frequency component in a basis function */
+static double calc_basis_freq(const BasisSet *b, int order)
{
- int err, count;
- cl_platform_id platform;
- cl_device_id device;
- cl_context_properties props[3];
-
- err = clGetPlatformIDs(1, &platform, &count);
- if (err != CL_SUCCESS || count < 1) {
- fprintf(stderr, "Could not get an OpenCL platform ID\n");
- return;
- }
-
- err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &count);
- if (err != CL_SUCCESS || count < 1) {
- fprintf(stderr, "Could not get an OpenCL device ID\n");
- return;
- }
-
- props[0] = CL_CONTEXT_PLATFORM;
- props[1] = (cl_context_properties)platform;
- props[2] = 0;
-
- ms->cl_ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
- if (err != CL_SUCCESS || !ms->cl_ctx) {
- fprintf(stderr, "Could not create an OpenCL context\n");
- return;
- }
-
- ms->cl_queue = clCreateCommandQueue(ms->cl_ctx, device, 0, &err);
- if (err != CL_SUCCESS || !ms->cl_queue) {
- fprintf(stderr, "Could not create an OpenCL command queue: %d\n", err);
- goto fail;
- }
-
- err = clblasSetup();
- if (err != CL_SUCCESS) {
- fprintf(stderr, "Error setting up clBLAS\n");
- goto fail;
- }
-
- ms->ocl_coeffs = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
-
- ms->bicgstab.cl_p = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_v = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_y = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_z = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_t = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_res = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_res0 = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_tmp = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_tmp1 = clCreateBuffer(ms->cl_ctx, 0, 2 * ms->nb_coeffs * sizeof(double), NULL, &err);
-
- ms->bicgstab.cl_k = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err);
- ms->bicgstab.cl_mat = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err);
-
- ms->bicgstab.cl_rho = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
- ms->bicgstab.cl_alpha = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
- ms->bicgstab.cl_beta = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
- ms->bicgstab.cl_omega = clCreateBuffer(ms->cl_ctx, 0, 2 * sizeof(double), NULL, &err);
- ms->bicgstab.cl_omega1 = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err);
-
- return;
-fail:
- if (ms->cl_queue)
- clReleaseCommandQueue(ms->cl_queue);
- ms->cl_queue = 0;
-
- if (ms->cl_ctx)
- clReleaseContext(ms->cl_ctx);
- ms->cl_ctx = 0;
-}
-
-static MaximalSlicingContext *init_ms(cGH *cctkGH,
- int basis_order_r, int basis_order_z,
- double sf,
- CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z,
- const int grid_size[3])
-{
- MaximalSlicingContext *ms;
- int ret;
-
- //const double h = (x[1] - x[0]) / 8;
- //fprintf(stderr, "h %g\n", h);
-
- //if (basis_order_r != basis_order_z)
- // CCTK_WARN(0, "Different r and z basis orders are not supported.");
-
- ms = calloc(1, sizeof(*ms));
-
- ms->gh = cctkGH;
-
- ms->basis = &msa_sb_even_basis;
- //ms->basis = &full_basis;
- //ms->basis = &cheb_basis;
-
- ms->nb_coeffs_x = basis_order_r;
- ms->nb_coeffs_z = basis_order_z;
-
- ms->nb_coeffs = ms->nb_coeffs_x * ms->nb_coeffs_z;
-
- ms->nb_colloc_points_x = basis_order_r;
- ms->nb_colloc_points_z = basis_order_z;
-
- ms->nb_colloc_points = ms->nb_colloc_points_x * ms->nb_colloc_points_z;
-
- if (ms->nb_colloc_points != ms->nb_coeffs)
- CCTK_WARN(0, "Non-square collocation matrix");
-
- ms->colloc_grid_order_x = ms->nb_colloc_points_x;
- ms->colloc_grid_order_z = ms->nb_colloc_points_z;
-
- ms->mat = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points);
- ms->coeffs = malloc(sizeof(double) * ms->nb_coeffs);
- ms->rhs = malloc(sizeof(double) * ms->nb_colloc_points);
-
- ms->mat_f = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points);
- ms->ipiv = malloc(sizeof(*ms->ipiv) * ms->nb_coeffs);
-
-#if 1
- scale_factor = 1.0;
-
- scale_factor = (x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] - 3) / ms->basis->colloc_point(ms->colloc_grid_order_x, ms->nb_colloc_points_x - 1);
- //scale_factor = x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] - 0.5;
- fprintf(stderr, "scale factor %g\n", scale_factor);
-
-#else
- scale_factor = sf;
-#endif
-
- ms->grid_x = malloc(ms->nb_colloc_points_x * sizeof(*ms->grid_x));
-
- for (int i = 0; i < ms->nb_colloc_points_x; i++) {
-#if 0
- double target_val = ms->basis->colloc_point(ms->colloc_grid_order_x, i);
- double best_diff = DBL_MAX, best_val = DBL_MAX;
-
- for (int j = 0; j < grid_size[0]; j++) {
- int idx = CCTK_GFINDEX3D(cctkGH, j, 0, 0);
- double val = x[idx];
- double diff = fabs(target_val - val);
-
- if (val > 0.0 && diff < best_diff) {
- int k;
- for (k = 0; k < i; k++)
- if (ms->grid_x[k] == val)
- break;
- if (k == i) {
- best_diff = diff;
- best_val = val;
- }
- }
- }
- if (best_val == DBL_MAX)
- abort();
- fprintf(stderr, "%d %g -> %g (%g)\n", i, target_val, best_val, target_val - best_val);
- ms->grid_x[i] = best_val;
-#elif 0
- double max = x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)];
- ms->grid_x[i] = max * SQR(SQR((double)i / ms->nb_colloc_points_x)) + 0.001;
-#else
- ms->grid_x[i] = ms->basis->colloc_point(ms->colloc_grid_order_x, i);
-#endif
- fprintf(stderr, "%d %g\n", i, ms->grid_x[i]);
- }
-
- ms->grid_z = malloc(ms->nb_colloc_points_z * sizeof(*ms->grid_z));
-
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- ms->grid_z[i] = ms->basis->colloc_point(ms->colloc_grid_order_z, i);
- }
-
- /* precompute the basis values we will need */
- ms->basis_x_val = malloc(sizeof(*ms->basis_x_val) * ms->nb_colloc_points_x * ms->nb_coeffs_x);
- ms->basis_x_dval = malloc(sizeof(*ms->basis_x_dval) * ms->nb_colloc_points_x * ms->nb_coeffs_x);
- ms->basis_x_d2val = malloc(sizeof(*ms->basis_x_d2val) * ms->nb_colloc_points_x * ms->nb_coeffs_x);
- for (int i = 0; i < ms->nb_colloc_points_x; i++) {
- CCTK_REAL coord = ms->grid_x[i];
- for (int j = 0; j < ms->nb_coeffs_x; j++) {
- ms->basis_x_val [i * ms->nb_coeffs_x + j] = ms->basis->eval(coord, j);
- ms->basis_x_dval [i * ms->nb_coeffs_x + j] = ms->basis->eval_diff1(coord, j);
- ms->basis_x_d2val[i * ms->nb_coeffs_x + j] = ms->basis->eval_diff2(coord, j);
- }
- }
-
- ms->basis_z_val = malloc(sizeof(*ms->basis_z_val) * ms->nb_colloc_points_z * ms->nb_coeffs_z);
- ms->basis_z_dval = malloc(sizeof(*ms->basis_z_dval) * ms->nb_colloc_points_z * ms->nb_coeffs_z);
- ms->basis_z_d2val = malloc(sizeof(*ms->basis_z_d2val) * ms->nb_colloc_points_z * ms->nb_coeffs_z);
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- CCTK_REAL coord = ms->grid_z[i];
- for (int j = 0; j < ms->nb_coeffs_z; j++) {
- ms->basis_z_val [i * ms->nb_coeffs_z + j] = ms->basis->eval(coord, j);
- ms->basis_z_dval [i * ms->nb_coeffs_z + j] = ms->basis->eval_diff1(coord, j);
- ms->basis_z_d2val[i * ms->nb_coeffs_z + j] = ms->basis->eval_diff2(coord, j);
- }
- }
-
- ms->basis_val_00 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00));
- ms->basis_val_11 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00));
- ms->basis_val_10 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00));
- ms->basis_val_01 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00));
- ms->basis_val_02 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00));
- ms->basis_val_20 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00));
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- const double *basis_val_z = ms->basis_z_val + i * ms->nb_coeffs_z;
- const double *dbasis_val_z = ms->basis_z_dval + i * ms->nb_coeffs_z;
- const double *d2basis_val_z = ms->basis_z_d2val + i * ms->nb_coeffs_z;
-
- for (int j = 0; j < ms->nb_colloc_points_x; j++) {
- const double *basis_val_x = ms->basis_x_val + j * ms->nb_coeffs_x;
- const double *dbasis_val_x = ms->basis_x_dval + j * ms->nb_coeffs_x;
- const double *d2basis_val_x = ms->basis_x_d2val + j * ms->nb_coeffs_x;
- const int idx_grid = i * ms->nb_colloc_points_x + j;
-
- for (int k = 0; k < ms->nb_coeffs_z; k++)
- for (int l = 0; l < ms->nb_coeffs_x; l++) {
- const int idx_coeff = k * ms->nb_coeffs_x + l;
- const int idx = idx_grid + ms->nb_colloc_points * idx_coeff;
- ms->basis_val_00[idx] = basis_val_x[l] * basis_val_z[k];
- ms->basis_val_11[idx] = dbasis_val_x[l] * dbasis_val_z[k];
- ms->basis_val_10[idx] = dbasis_val_x[l] * basis_val_z[k];
- ms->basis_val_01[idx] = basis_val_x[l] * dbasis_val_z[k];
- ms->basis_val_02[idx] = basis_val_x[l] * d2basis_val_z[k];
- ms->basis_val_20[idx] = d2basis_val_x[l] * basis_val_z[k];
- }
- }
- }
-
- ms->interp_coords[0] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[0]));
- ms->interp_coords[1] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[1]));
- ms->interp_coords[2] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[2]));
- for (int i = 0; i < ms->nb_colloc_points_z; i++) {
- CCTK_REAL z = ms->grid_z[i];
- for (int j = 0; j < ms->nb_colloc_points_x; j++) {
- CCTK_REAL x = ms->grid_x[j];
-
- ms->interp_coords[0][i * ms->nb_colloc_points_x + j] = x;
- ms->interp_coords[1][i * ms->nb_colloc_points_x + j] = 0;
- ms->interp_coords[2][i * ms->nb_colloc_points_x + j] = z;
- }
- }
-
- for (int i = 0; i < ARRAY_ELEMS(ms->metric_u); i++)
- ms->metric_u[i] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_values[i]));
-
- ms->kij_kij = malloc(ms->nb_colloc_points * sizeof(*ms->kij_kij));
-
- for (int i = 0; i < ARRAY_ELEMS(ms->interp_values); i++) {
- ms->interp_values[i] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_values[i]));
- ms->interp_value_codes[i] = CCTK_VARIABLE_REAL;
- }
-
- for (int i = 0; i < ARRAY_ELEMS(metric_vars); i++)
- ms->interp_vars_indices[i] = CCTK_VarIndex(metric_vars[i]);
-
- ms->coord_system = CCTK_CoordSystemHandle("cart3d");
- if (ms->coord_system < 0)
- CCTK_WARN(0, "Error getting the coordinate system");
-
- ms->interp_operator = CCTK_InterpHandle("Lagrange polynomial interpolation");
- if (ms->interp_operator < 0)
- CCTK_WARN(0, "Error getting the interpolation operator");
-
- ms->interp_params = Util_TableCreateFromString("order=2 want_global_mode=1");
- if (ms->interp_params < 0)
- CCTK_WARN(0, "Error creating interpolation parameters table");
-
- ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS,
- interp_operation_codes, "operation_codes");
- if (ret < 0)
- CCTK_WARN(0, "Error setting operation codes");
-
- ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS,
- interp_operation_indices, "operand_indices");
- if (ret < 0)
- CCTK_WARN(0, "Error setting operand indices");
-
- ms->bicgstab.p = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.v = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.y = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.z = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.t = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.res = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.res0 = malloc(sizeof(double) * ms->nb_coeffs);
- ms->bicgstab.k = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points);
-
- ms->steps_since_inverse = INT_MAX;
-
- init_opencl(ms);
-
- CCTK_TimerCreate("MaximalSlicingAxi_Solve");
- CCTK_TimerCreate("MaximalSlicingAxi_Expand");
- CCTK_TimerCreate("MaximalSlicingAxi_calc_geometry");
- CCTK_TimerCreate("MaximalSlicingAxi_construct_matrix");
- CCTK_TimerCreate("MaximalSlicingAxi_solve_LU");
- CCTK_TimerCreate("MaximalSlicingAxi_solve_BiCGSTAB");
-
- return ms;
+ return b->colloc_point(order, 1);
}
static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
- CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z,
- CCTK_REAL *alp)
+ CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z)
{
cGH *cctkGH = ms->gh;
+ int nb_coeffs_x = ms->solver->nb_coeffs[0];
+ int nb_coeffs_z = ms->solver->nb_coeffs[1];
CoordPatch *cp;
int64_t grid_size;
+ int i;
for (int i = 0; i < ms->nb_patches; i++) {
cp = &ms->patches[i];
@@ -471,9 +51,9 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
cp->size[0] == ms->gh->cctk_lsh[0] &&
cp->size[1] == ms->gh->cctk_lsh[1] &&
cp->size[2] == ms->gh->cctk_lsh[2] &&
- cp->delta[0] == ms->gh->cctk_delta_space[0] &&
- cp->delta[1] == ms->gh->cctk_delta_space[1] &&
- cp->delta[2] == ms->gh->cctk_delta_space[2])
+ cp->delta[0] == ms->gh->cctk_levfac[0] &&
+ cp->delta[1] == ms->gh->cctk_levfac[1] &&
+ cp->delta[2] == ms->gh->cctk_levfac[2])
return cp;
}
@@ -483,10 +63,21 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
ms->patches = realloc(ms->patches, sizeof(*ms->patches) * (ms->nb_patches + 1));
cp = &ms->patches[ms->nb_patches];
+ memset(cp, 0, sizeof(*cp));
+
memcpy(cp->origin, ms->gh->cctk_origin_space, sizeof(cp->origin));
memcpy(cp->size, ms->gh->cctk_lsh, sizeof(cp->size));
- memcpy(cp->delta, ms->gh->cctk_delta_space, sizeof(cp->delta));
+ memcpy(cp->delta, ms->gh->cctk_levfac, sizeof(cp->delta));
+
+ for (i = 0; i < cp->size[1]; i++)
+ if (fabs(y[CCTK_GFINDEX3D(cctkGH, 0, i, 0)]) < 1e-8) {
+ cp->y_idx = i;
+ break;
+ }
+ if (i == cp->size[1])
+ CCTK_WARN(0, "The grid does not include y==0");
+#if 0
posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]);
for (int j = 0; j < ms->gh->cctk_lsh[1]; j++)
for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) {
@@ -500,23 +91,91 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms,
}
posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]);
-
for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) {
CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)];
for (int j = 0; j < ms->nb_coeffs_z; j++)
- cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(zz, j);
+ cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j);
//cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j);
}
-
posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->nb_coeffs_x);
posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size);
for (int i = 0; i < grid_size; i++)
cp->one[i] = 1.0;
+#else
+ posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * nb_coeffs_x * cp->size[0] * cp->size[2]);
+ posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * nb_coeffs_z * cp->size[0] * cp->size[2]);
+#pragma omp parallel for
+ for (int j = 0; j < cp->size[2]; j++) {
+ CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)];
+
+ for (int i = 0; i < cp->size[0]; i++) {
+ const int idx_grid = j * cp->size[0] + i;
+
+ double xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)];
+ double rr = sqrt(SQR(xx) + SQR(zz));
+
+#if MS_POLAR
+ double coord0 = rr;
+ double coord1 = atan2(zz, xx);
+#else
+ double coord0 = xx;
+ double coord1 = zz;
+#endif
+
+ //for (int k = 0; k < ms->nb_coeffs_z; k++)
+ // for (int l = 0; l < ms->nb_coeffs_x; l++) {
+ // const int idx_coeff = k * ms->nb_coeffs_x + l;
+ // cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * idx_coeff] = ms->basis->eval(r, l) * ms->basis1->eval(phi, k);
+ // }
+ for (int k = 0; k < nb_coeffs_x; k++) {
+ double dx = calc_basis_freq(ms->solver->basis[0], k);
+ double r0 = dx * 64;
+ double fact = exp(-36.0 * pow(rr / r0, 10));
+ fact = 1.0;
+
+ cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->solver->basis[0]->eval(coord0, k) * fact;
+ }
+ for (int k = 0; k < nb_coeffs_z; k++) {
+ double dx = calc_basis_freq(ms->solver->basis[1], k);
+ double r0 = dx * 64;
+ double fact = exp(-36.0 * pow(rr / r0, 10));
+ fact = 1.0;
+
+ cp->transform_matrix1[idx_grid * nb_coeffs_z + k] = ms->solver->basis[1]->eval(coord1, k) * fact;
+ }
+ }
+ }
+ posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * nb_coeffs_z);
+#endif
ms->nb_patches++;
return cp;
}
+static int context_init(cGH *cctkGH, MaximalSlicingContext **ctx)
+{
+ MaximalSlicingContext *ms;
+ int ret;
+
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ ms = calloc(1, sizeof(*ms));
+ if (!ms)
+ return -ENOMEM;
+
+ ms->gh = cctkGH;
+
+ ret = ms_solver_init(&ms->solver, cctkGH, basis_order_r, basis_order_z,
+ scale_factor, filter_power, 0.0);
+ if (ret < 0)
+ return ret;
+
+ *ctx = ms;
+
+ return 0;
+}
+
void maximal_slicing_axi(CCTK_ARGUMENTS)
{
static MaximalSlicingContext *ms;
@@ -526,45 +185,97 @@ void maximal_slicing_axi(CCTK_ARGUMENTS)
DECLARE_CCTK_ARGUMENTS;
DECLARE_CCTK_PARAMETERS;
- static double total;
- static int64_t count;
- int64_t timer_start;
+ int64_t expand_start, totaltime_start;
+
+ double *coeffs = NULL;
+ int i, ret;
- const int64_t grid_size = cctk_lsh[2] * cctk_lsh[1] * cctk_lsh[0];
+ totaltime_start = gettime();
/* on the first run, init the solver */
- if (!ms) {
- ms = init_ms(cctkGH, basis_order_r, basis_order_z,
- scale_factor, x, y, z, cctk_lsh);
- }
+ if (!ms)
+ context_init(cctkGH, &ms);
- cp = get_coord_patch(ms, x, y, z, alp);
+ cp = get_coord_patch(ms, x, y, z);
CCTK_TimerStart("MaximalSlicingAxi_Solve");
- msa_maximal_solve(ms);
+ ms_solver_solve(ms->solver);
CCTK_TimerStop("MaximalSlicingAxi_Solve");
+ coeffs = ms->solver->coeffs;
+
if (export_coeffs)
- memcpy(alpha_coeffs, ms->coeffs, sizeof(*alpha_coeffs) * ms->nb_coeffs);
+ memcpy(alpha_coeffs, coeffs, sizeof(*alpha_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]);
CCTK_TimerStart("MaximalSlicingAxi_Expand");
- timer_start = gettime();
+ expand_start = gettime();
+#if 0
+#pragma omp parallel for
+ for (int k = 0; k < cctk_lsh[2]; k++) {
+ for (int i = 0; i < cctk_lsh[0]; i++) {
+ int idx = CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, k);
+ double xx = x[idx];
+ double zz = z[idx];
+ double r = sqrt(SQR(xx) + SQR(zz));
+ double phi = atan2(zz, xx);
+
+ double val = 1.0;
+
+ for (int l = 0; l < ms->nb_coeffs_z; l++) {
+ double tmp = 0.0;
+ for (int m = 0; m < ms->nb_coeffs_x; m++) {
+ const int idx_coeff = l * ms->nb_coeffs_x + m;
+ tmp += ms->coeffs[idx_coeff] * ms->basis->eval(r, m);
+ }
+ val += tmp * ms->basis1->eval(phi, l);
+ }
- memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp));
- cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
- ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0,
- ms->coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z,
- 0.0, cp->transform_z, ms->nb_coeffs_x);
+ alp[idx] = val;
+ }
+ }
+#else
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
- cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0,
- cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x,
- 1.0, alp, cctk_lsh[0] * cctk_lsh[1]);
-
- total += gettime() - timer_start;
+ cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0],
+ 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2],
+ coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]);
+#pragma omp parallel for
+ for (int j = 0; j < cctk_lsh[2]; j++)
+ for (int i = 0; i < cctk_lsh[0]; i++) {
+ const int idx_grid = j * cctk_lsh[0] + i;
+ const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1,
+ cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]);
+ alpha[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = 1.0 + val;
+ }
+#endif
+ //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp));
+ //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp));
+ //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ // ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0,
+ // coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z,
+ // 0.0, cp->transform_z, ms->nb_coeffs_x);
+ //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
+ // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0,
+ // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x,
+ // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]);
+
+ ms->grid_expand_time += gettime() - expand_start;
+ ms->grid_expand_count++;
CCTK_TimerStop("MaximalSlicingAxi_Expand");
- count++;
- if (!(count & 15))
- fprintf(stderr, "avg %g total %g\n", total / count, total);
+ //CCTK_TimerStart("MaximalSlicingAxi_Polish");
+ //msa_sor(ms, cp);
+ //CCTK_TimerStop("MaximalSlicingAxi_Polish");
+
+ /* print stats */
+ if (!(ms->grid_expand_count & 255)) {
+ fprintf(stderr, "Maximal slicing stats:\n");
+
+ ms_solver_print_stats(ms->solver);
+ fprintf(stderr,
+ "%lu evals: total time %g s, avg time per call %g ms\n",
+ ms->grid_expand_count, (double)ms->grid_expand_time / 1e6,
+ (double)ms->grid_expand_time / ms->grid_expand_count / 1e3);
+
+ }
}