diff options
Diffstat (limited to 'src/maximal_slicing_axi.c')
-rw-r--r-- | src/maximal_slicing_axi.c | 631 |
1 files changed, 171 insertions, 460 deletions
diff --git a/src/maximal_slicing_axi.c b/src/maximal_slicing_axi.c index 48bb4c9..e5b0d85 100644 --- a/src/maximal_slicing_axi.c +++ b/src/maximal_slicing_axi.c @@ -21,446 +21,26 @@ #include "util_Table.h" #include "maximal_slicing_axi.h" - -#define ACC_TEST 0 - -/* - * The basis of even (n = 2 * idx) SB functions (Boyd 2000, Ch 17.9) - * SB(x, n) = sin((n + 1) arccot(|x| / L)) - * They are symmetric wrt origin and decay as 1/x in infinity. - */ +#include "ms_solve.h" double scale_factor; - -/* mapping between our indices and thorn names */ -static const char *metric_vars[] = { - [GTXX] = "ML_BSSN::gt11", - [GTYY] = "ML_BSSN::gt22", - [GTZZ] = "ML_BSSN::gt33", - [GTXY] = "ML_BSSN::gt12", - [GTXZ] = "ML_BSSN::gt13", - [GTYZ] = "ML_BSSN::gt23", - [ATXX] = "ML_BSSN::At11", - [ATYY] = "ML_BSSN::At22", - [ATZZ] = "ML_BSSN::At33", - [ATXY] = "ML_BSSN::At12", - [ATXZ] = "ML_BSSN::At13", - [ATYZ] = "ML_BSSN::At23", - [PHI] = "ML_BSSN::phi", - [K] = "ML_BSSN::trK", - [XTX] = "ML_BSSN::Xt1", - [XTY] = "ML_BSSN::Xt2", - [XTZ] = "ML_BSSN::Xt3", - [BETAX] = "ML_BSSN::beta1", - [BETAY] = "ML_BSSN::beta2", - [BETAZ] = "ML_BSSN::beta3", -}; - -/* mapping between the cactus grid values and interpolated values */ -static const CCTK_INT interp_operation_indices[] = { - [I_GTXX] = GTXX, - [I_GTXX_DX] = GTXX, - [I_GTXX_DY] = GTXX, - [I_GTXX_DZ] = GTXX, - [I_GTYY] = GTYY, - [I_GTYY_DX] = GTYY, - [I_GTYY_DY] = GTYY, - [I_GTYY_DZ] = GTYY, - [I_GTZZ] = GTZZ, - [I_GTZZ_DX] = GTZZ, - [I_GTZZ_DY] = GTZZ, - [I_GTZZ_DZ] = GTZZ, - [I_GTXY] = GTXY, - [I_GTXY_DX] = GTXY, - [I_GTXY_DY] = GTXY, - [I_GTXY_DZ] = GTXY, - [I_GTXZ] = GTXZ, - [I_GTXZ_DX] = GTXZ, - [I_GTXZ_DY] = GTXZ, - [I_GTXZ_DZ] = GTXZ, - [I_GTYZ] = GTYZ, - [I_GTYZ_DX] = GTYZ, - [I_GTYZ_DY] = GTYZ, - [I_GTYZ_DZ] = GTYZ, - [I_PHI] = PHI, - [I_PHI_DX] = PHI, - [I_PHI_DY] = PHI, - [I_PHI_DZ] = PHI, - [I_ATXX] = ATXX, - [I_ATYY] = ATYY, - [I_ATZZ] = ATZZ, - [I_ATXY] = ATXY, - [I_ATXZ] = ATXZ, - [I_ATYZ] = ATYZ, - [I_K] = K, - [I_K_DX] = K, - [I_K_DY] = K, - [I_K_DZ] = K, - [I_XTX] = XTX, - [I_XTY] = XTY, - [I_XTZ] = XTZ, - [I_BETAX] = BETAX, - [I_BETAY] = BETAY, - [I_BETAZ] = BETAZ, -}; - -/* the operation (plain value or x/y/z-derivative) to apply during interpolation */ -static const CCTK_INT interp_operation_codes[] = { - [I_GTXX] = 0, - [I_GTXX_DX] = 1, - [I_GTXX_DY] = 2, - [I_GTXX_DZ] = 3, - [I_GTYY] = 0, - [I_GTYY_DX] = 1, - [I_GTYY_DY] = 2, - [I_GTYY_DZ] = 3, - [I_GTZZ] = 0, - [I_GTZZ_DX] = 1, - [I_GTZZ_DY] = 2, - [I_GTZZ_DZ] = 3, - [I_GTXY] = 0, - [I_GTXY_DX] = 1, - [I_GTXY_DY] = 2, - [I_GTXY_DZ] = 3, - [I_GTXZ] = 0, - [I_GTXZ_DX] = 1, - [I_GTXZ_DY] = 2, - [I_GTXZ_DZ] = 3, - [I_GTYZ] = 0, - [I_GTYZ_DX] = 1, - [I_GTYZ_DY] = 2, - [I_GTYZ_DZ] = 3, - [I_PHI] = 0, - [I_PHI_DX] = 1, - [I_PHI_DY] = 2, - [I_PHI_DZ] = 3, - [I_ATXX] = 0, - [I_ATYY] = 0, - [I_ATZZ] = 0, - [I_ATXY] = 0, - [I_ATXZ] = 0, - [I_ATYZ] = 0, - [I_K] = 0, - [I_K_DX] = 1, - [I_K_DY] = 2, - [I_K_DZ] = 3, - [I_XTX] = 0, - [I_XTY] = 0, - [I_XTZ] = 0, - [I_BETAX] = 0, - [I_BETAY] = 0, - [I_BETAZ] = 0, -}; - -static void init_opencl(MaximalSlicingContext *ms) +/* get an approximate "main" frequency component in a basis function */ +static double calc_basis_freq(const BasisSet *b, int order) { - int err, count; - cl_platform_id platform; - cl_device_id device; - cl_context_properties props[3]; - - err = clGetPlatformIDs(1, &platform, &count); - if (err != CL_SUCCESS || count < 1) { - fprintf(stderr, "Could not get an OpenCL platform ID\n"); - return; - } - - err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &count); - if (err != CL_SUCCESS || count < 1) { - fprintf(stderr, "Could not get an OpenCL device ID\n"); - return; - } - - props[0] = CL_CONTEXT_PLATFORM; - props[1] = (cl_context_properties)platform; - props[2] = 0; - - ms->cl_ctx = clCreateContext(props, 1, &device, NULL, NULL, &err); - if (err != CL_SUCCESS || !ms->cl_ctx) { - fprintf(stderr, "Could not create an OpenCL context\n"); - return; - } - - ms->cl_queue = clCreateCommandQueue(ms->cl_ctx, device, 0, &err); - if (err != CL_SUCCESS || !ms->cl_queue) { - fprintf(stderr, "Could not create an OpenCL command queue: %d\n", err); - goto fail; - } - - err = clblasSetup(); - if (err != CL_SUCCESS) { - fprintf(stderr, "Error setting up clBLAS\n"); - goto fail; - } - - ms->ocl_coeffs = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - - ms->bicgstab.cl_p = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_v = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_y = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_z = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_t = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_res = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_res0 = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_tmp = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_tmp1 = clCreateBuffer(ms->cl_ctx, 0, 2 * ms->nb_coeffs * sizeof(double), NULL, &err); - - ms->bicgstab.cl_k = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_mat = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err); - - ms->bicgstab.cl_rho = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - ms->bicgstab.cl_alpha = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - ms->bicgstab.cl_beta = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - ms->bicgstab.cl_omega = clCreateBuffer(ms->cl_ctx, 0, 2 * sizeof(double), NULL, &err); - ms->bicgstab.cl_omega1 = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - - return; -fail: - if (ms->cl_queue) - clReleaseCommandQueue(ms->cl_queue); - ms->cl_queue = 0; - - if (ms->cl_ctx) - clReleaseContext(ms->cl_ctx); - ms->cl_ctx = 0; -} - -static MaximalSlicingContext *init_ms(cGH *cctkGH, - int basis_order_r, int basis_order_z, - double sf, - CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z, - const int grid_size[3]) -{ - MaximalSlicingContext *ms; - int ret; - - //const double h = (x[1] - x[0]) / 8; - //fprintf(stderr, "h %g\n", h); - - //if (basis_order_r != basis_order_z) - // CCTK_WARN(0, "Different r and z basis orders are not supported."); - - ms = calloc(1, sizeof(*ms)); - - ms->gh = cctkGH; - - ms->basis = &msa_sb_even_basis; - //ms->basis = &full_basis; - //ms->basis = &cheb_basis; - - ms->nb_coeffs_x = basis_order_r; - ms->nb_coeffs_z = basis_order_z; - - ms->nb_coeffs = ms->nb_coeffs_x * ms->nb_coeffs_z; - - ms->nb_colloc_points_x = basis_order_r; - ms->nb_colloc_points_z = basis_order_z; - - ms->nb_colloc_points = ms->nb_colloc_points_x * ms->nb_colloc_points_z; - - if (ms->nb_colloc_points != ms->nb_coeffs) - CCTK_WARN(0, "Non-square collocation matrix"); - - ms->colloc_grid_order_x = ms->nb_colloc_points_x; - ms->colloc_grid_order_z = ms->nb_colloc_points_z; - - ms->mat = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points); - ms->coeffs = malloc(sizeof(double) * ms->nb_coeffs); - ms->rhs = malloc(sizeof(double) * ms->nb_colloc_points); - - ms->mat_f = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points); - ms->ipiv = malloc(sizeof(*ms->ipiv) * ms->nb_coeffs); - -#if 1 - scale_factor = 1.0; - - scale_factor = (x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] - 3) / ms->basis->colloc_point(ms->colloc_grid_order_x, ms->nb_colloc_points_x - 1); - //scale_factor = x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] - 0.5; - fprintf(stderr, "scale factor %g\n", scale_factor); - -#else - scale_factor = sf; -#endif - - ms->grid_x = malloc(ms->nb_colloc_points_x * sizeof(*ms->grid_x)); - - for (int i = 0; i < ms->nb_colloc_points_x; i++) { -#if 0 - double target_val = ms->basis->colloc_point(ms->colloc_grid_order_x, i); - double best_diff = DBL_MAX, best_val = DBL_MAX; - - for (int j = 0; j < grid_size[0]; j++) { - int idx = CCTK_GFINDEX3D(cctkGH, j, 0, 0); - double val = x[idx]; - double diff = fabs(target_val - val); - - if (val > 0.0 && diff < best_diff) { - int k; - for (k = 0; k < i; k++) - if (ms->grid_x[k] == val) - break; - if (k == i) { - best_diff = diff; - best_val = val; - } - } - } - if (best_val == DBL_MAX) - abort(); - fprintf(stderr, "%d %g -> %g (%g)\n", i, target_val, best_val, target_val - best_val); - ms->grid_x[i] = best_val; -#elif 0 - double max = x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)]; - ms->grid_x[i] = max * SQR(SQR((double)i / ms->nb_colloc_points_x)) + 0.001; -#else - ms->grid_x[i] = ms->basis->colloc_point(ms->colloc_grid_order_x, i); -#endif - fprintf(stderr, "%d %g\n", i, ms->grid_x[i]); - } - - ms->grid_z = malloc(ms->nb_colloc_points_z * sizeof(*ms->grid_z)); - - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - ms->grid_z[i] = ms->basis->colloc_point(ms->colloc_grid_order_z, i); - } - - /* precompute the basis values we will need */ - ms->basis_x_val = malloc(sizeof(*ms->basis_x_val) * ms->nb_colloc_points_x * ms->nb_coeffs_x); - ms->basis_x_dval = malloc(sizeof(*ms->basis_x_dval) * ms->nb_colloc_points_x * ms->nb_coeffs_x); - ms->basis_x_d2val = malloc(sizeof(*ms->basis_x_d2val) * ms->nb_colloc_points_x * ms->nb_coeffs_x); - for (int i = 0; i < ms->nb_colloc_points_x; i++) { - CCTK_REAL coord = ms->grid_x[i]; - for (int j = 0; j < ms->nb_coeffs_x; j++) { - ms->basis_x_val [i * ms->nb_coeffs_x + j] = ms->basis->eval(coord, j); - ms->basis_x_dval [i * ms->nb_coeffs_x + j] = ms->basis->eval_diff1(coord, j); - ms->basis_x_d2val[i * ms->nb_coeffs_x + j] = ms->basis->eval_diff2(coord, j); - } - } - - ms->basis_z_val = malloc(sizeof(*ms->basis_z_val) * ms->nb_colloc_points_z * ms->nb_coeffs_z); - ms->basis_z_dval = malloc(sizeof(*ms->basis_z_dval) * ms->nb_colloc_points_z * ms->nb_coeffs_z); - ms->basis_z_d2val = malloc(sizeof(*ms->basis_z_d2val) * ms->nb_colloc_points_z * ms->nb_coeffs_z); - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - CCTK_REAL coord = ms->grid_z[i]; - for (int j = 0; j < ms->nb_coeffs_z; j++) { - ms->basis_z_val [i * ms->nb_coeffs_z + j] = ms->basis->eval(coord, j); - ms->basis_z_dval [i * ms->nb_coeffs_z + j] = ms->basis->eval_diff1(coord, j); - ms->basis_z_d2val[i * ms->nb_coeffs_z + j] = ms->basis->eval_diff2(coord, j); - } - } - - ms->basis_val_00 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00)); - ms->basis_val_11 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00)); - ms->basis_val_10 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00)); - ms->basis_val_01 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00)); - ms->basis_val_02 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00)); - ms->basis_val_20 = calloc(ms->nb_colloc_points * ms->nb_coeffs, sizeof(*ms->basis_val_00)); - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - const double *basis_val_z = ms->basis_z_val + i * ms->nb_coeffs_z; - const double *dbasis_val_z = ms->basis_z_dval + i * ms->nb_coeffs_z; - const double *d2basis_val_z = ms->basis_z_d2val + i * ms->nb_coeffs_z; - - for (int j = 0; j < ms->nb_colloc_points_x; j++) { - const double *basis_val_x = ms->basis_x_val + j * ms->nb_coeffs_x; - const double *dbasis_val_x = ms->basis_x_dval + j * ms->nb_coeffs_x; - const double *d2basis_val_x = ms->basis_x_d2val + j * ms->nb_coeffs_x; - const int idx_grid = i * ms->nb_colloc_points_x + j; - - for (int k = 0; k < ms->nb_coeffs_z; k++) - for (int l = 0; l < ms->nb_coeffs_x; l++) { - const int idx_coeff = k * ms->nb_coeffs_x + l; - const int idx = idx_grid + ms->nb_colloc_points * idx_coeff; - ms->basis_val_00[idx] = basis_val_x[l] * basis_val_z[k]; - ms->basis_val_11[idx] = dbasis_val_x[l] * dbasis_val_z[k]; - ms->basis_val_10[idx] = dbasis_val_x[l] * basis_val_z[k]; - ms->basis_val_01[idx] = basis_val_x[l] * dbasis_val_z[k]; - ms->basis_val_02[idx] = basis_val_x[l] * d2basis_val_z[k]; - ms->basis_val_20[idx] = d2basis_val_x[l] * basis_val_z[k]; - } - } - } - - ms->interp_coords[0] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[0])); - ms->interp_coords[1] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[1])); - ms->interp_coords[2] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[2])); - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - CCTK_REAL z = ms->grid_z[i]; - for (int j = 0; j < ms->nb_colloc_points_x; j++) { - CCTK_REAL x = ms->grid_x[j]; - - ms->interp_coords[0][i * ms->nb_colloc_points_x + j] = x; - ms->interp_coords[1][i * ms->nb_colloc_points_x + j] = 0; - ms->interp_coords[2][i * ms->nb_colloc_points_x + j] = z; - } - } - - for (int i = 0; i < ARRAY_ELEMS(ms->metric_u); i++) - ms->metric_u[i] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_values[i])); - - ms->kij_kij = malloc(ms->nb_colloc_points * sizeof(*ms->kij_kij)); - - for (int i = 0; i < ARRAY_ELEMS(ms->interp_values); i++) { - ms->interp_values[i] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_values[i])); - ms->interp_value_codes[i] = CCTK_VARIABLE_REAL; - } - - for (int i = 0; i < ARRAY_ELEMS(metric_vars); i++) - ms->interp_vars_indices[i] = CCTK_VarIndex(metric_vars[i]); - - ms->coord_system = CCTK_CoordSystemHandle("cart3d"); - if (ms->coord_system < 0) - CCTK_WARN(0, "Error getting the coordinate system"); - - ms->interp_operator = CCTK_InterpHandle("Lagrange polynomial interpolation"); - if (ms->interp_operator < 0) - CCTK_WARN(0, "Error getting the interpolation operator"); - - ms->interp_params = Util_TableCreateFromString("order=2 want_global_mode=1"); - if (ms->interp_params < 0) - CCTK_WARN(0, "Error creating interpolation parameters table"); - - ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS, - interp_operation_codes, "operation_codes"); - if (ret < 0) - CCTK_WARN(0, "Error setting operation codes"); - - ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS, - interp_operation_indices, "operand_indices"); - if (ret < 0) - CCTK_WARN(0, "Error setting operand indices"); - - ms->bicgstab.p = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.v = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.y = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.z = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.t = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.res = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.res0 = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.k = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points); - - ms->steps_since_inverse = INT_MAX; - - init_opencl(ms); - - CCTK_TimerCreate("MaximalSlicingAxi_Solve"); - CCTK_TimerCreate("MaximalSlicingAxi_Expand"); - CCTK_TimerCreate("MaximalSlicingAxi_calc_geometry"); - CCTK_TimerCreate("MaximalSlicingAxi_construct_matrix"); - CCTK_TimerCreate("MaximalSlicingAxi_solve_LU"); - CCTK_TimerCreate("MaximalSlicingAxi_solve_BiCGSTAB"); - - return ms; + return b->colloc_point(order, 1); } static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, - CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z, - CCTK_REAL *alp) + CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z) { cGH *cctkGH = ms->gh; + int nb_coeffs_x = ms->solver->nb_coeffs[0]; + int nb_coeffs_z = ms->solver->nb_coeffs[1]; CoordPatch *cp; int64_t grid_size; + int i; for (int i = 0; i < ms->nb_patches; i++) { cp = &ms->patches[i]; @@ -471,9 +51,9 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, cp->size[0] == ms->gh->cctk_lsh[0] && cp->size[1] == ms->gh->cctk_lsh[1] && cp->size[2] == ms->gh->cctk_lsh[2] && - cp->delta[0] == ms->gh->cctk_delta_space[0] && - cp->delta[1] == ms->gh->cctk_delta_space[1] && - cp->delta[2] == ms->gh->cctk_delta_space[2]) + cp->delta[0] == ms->gh->cctk_levfac[0] && + cp->delta[1] == ms->gh->cctk_levfac[1] && + cp->delta[2] == ms->gh->cctk_levfac[2]) return cp; } @@ -483,10 +63,21 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, ms->patches = realloc(ms->patches, sizeof(*ms->patches) * (ms->nb_patches + 1)); cp = &ms->patches[ms->nb_patches]; + memset(cp, 0, sizeof(*cp)); + memcpy(cp->origin, ms->gh->cctk_origin_space, sizeof(cp->origin)); memcpy(cp->size, ms->gh->cctk_lsh, sizeof(cp->size)); - memcpy(cp->delta, ms->gh->cctk_delta_space, sizeof(cp->delta)); + memcpy(cp->delta, ms->gh->cctk_levfac, sizeof(cp->delta)); + + for (i = 0; i < cp->size[1]; i++) + if (fabs(y[CCTK_GFINDEX3D(cctkGH, 0, i, 0)]) < 1e-8) { + cp->y_idx = i; + break; + } + if (i == cp->size[1]) + CCTK_WARN(0, "The grid does not include y==0"); +#if 0 posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]); for (int j = 0; j < ms->gh->cctk_lsh[1]; j++) for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { @@ -500,23 +91,91 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, } posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]); - for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)]; for (int j = 0; j < ms->nb_coeffs_z; j++) - cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(zz, j); + cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j); //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j); } - posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->nb_coeffs_x); posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size); for (int i = 0; i < grid_size; i++) cp->one[i] = 1.0; +#else + posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * nb_coeffs_x * cp->size[0] * cp->size[2]); + posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * nb_coeffs_z * cp->size[0] * cp->size[2]); +#pragma omp parallel for + for (int j = 0; j < cp->size[2]; j++) { + CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)]; + + for (int i = 0; i < cp->size[0]; i++) { + const int idx_grid = j * cp->size[0] + i; + + double xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; + double rr = sqrt(SQR(xx) + SQR(zz)); + +#if MS_POLAR + double coord0 = rr; + double coord1 = atan2(zz, xx); +#else + double coord0 = xx; + double coord1 = zz; +#endif + + //for (int k = 0; k < ms->nb_coeffs_z; k++) + // for (int l = 0; l < ms->nb_coeffs_x; l++) { + // const int idx_coeff = k * ms->nb_coeffs_x + l; + // cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * idx_coeff] = ms->basis->eval(r, l) * ms->basis1->eval(phi, k); + // } + for (int k = 0; k < nb_coeffs_x; k++) { + double dx = calc_basis_freq(ms->solver->basis[0], k); + double r0 = dx * 64; + double fact = exp(-36.0 * pow(rr / r0, 10)); + fact = 1.0; + + cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->solver->basis[0]->eval(coord0, k) * fact; + } + for (int k = 0; k < nb_coeffs_z; k++) { + double dx = calc_basis_freq(ms->solver->basis[1], k); + double r0 = dx * 64; + double fact = exp(-36.0 * pow(rr / r0, 10)); + fact = 1.0; + + cp->transform_matrix1[idx_grid * nb_coeffs_z + k] = ms->solver->basis[1]->eval(coord1, k) * fact; + } + } + } + posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * nb_coeffs_z); +#endif ms->nb_patches++; return cp; } +static int context_init(cGH *cctkGH, MaximalSlicingContext **ctx) +{ + MaximalSlicingContext *ms; + int ret; + + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + ms = calloc(1, sizeof(*ms)); + if (!ms) + return -ENOMEM; + + ms->gh = cctkGH; + + ret = ms_solver_init(&ms->solver, cctkGH, basis_order_r, basis_order_z, + scale_factor, filter_power, 0.0); + if (ret < 0) + return ret; + + *ctx = ms; + + return 0; +} + void maximal_slicing_axi(CCTK_ARGUMENTS) { static MaximalSlicingContext *ms; @@ -526,45 +185,97 @@ void maximal_slicing_axi(CCTK_ARGUMENTS) DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; - static double total; - static int64_t count; - int64_t timer_start; + int64_t expand_start, totaltime_start; + + double *coeffs = NULL; + int i, ret; - const int64_t grid_size = cctk_lsh[2] * cctk_lsh[1] * cctk_lsh[0]; + totaltime_start = gettime(); /* on the first run, init the solver */ - if (!ms) { - ms = init_ms(cctkGH, basis_order_r, basis_order_z, - scale_factor, x, y, z, cctk_lsh); - } + if (!ms) + context_init(cctkGH, &ms); - cp = get_coord_patch(ms, x, y, z, alp); + cp = get_coord_patch(ms, x, y, z); CCTK_TimerStart("MaximalSlicingAxi_Solve"); - msa_maximal_solve(ms); + ms_solver_solve(ms->solver); CCTK_TimerStop("MaximalSlicingAxi_Solve"); + coeffs = ms->solver->coeffs; + if (export_coeffs) - memcpy(alpha_coeffs, ms->coeffs, sizeof(*alpha_coeffs) * ms->nb_coeffs); + memcpy(alpha_coeffs, coeffs, sizeof(*alpha_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]); CCTK_TimerStart("MaximalSlicingAxi_Expand"); - timer_start = gettime(); + expand_start = gettime(); +#if 0 +#pragma omp parallel for + for (int k = 0; k < cctk_lsh[2]; k++) { + for (int i = 0; i < cctk_lsh[0]; i++) { + int idx = CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, k); + double xx = x[idx]; + double zz = z[idx]; + double r = sqrt(SQR(xx) + SQR(zz)); + double phi = atan2(zz, xx); + + double val = 1.0; + + for (int l = 0; l < ms->nb_coeffs_z; l++) { + double tmp = 0.0; + for (int m = 0; m < ms->nb_coeffs_x; m++) { + const int idx_coeff = l * ms->nb_coeffs_x + m; + tmp += ms->coeffs[idx_coeff] * ms->basis->eval(r, m); + } + val += tmp * ms->basis1->eval(phi, l); + } - memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); - cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0, - ms->coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z, - 0.0, cp->transform_z, ms->nb_coeffs_x); + alp[idx] = val; + } + } +#else cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0, - cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x, - 1.0, alp, cctk_lsh[0] * cctk_lsh[1]); - - total += gettime() - timer_start; + cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], + 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2], + coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]); +#pragma omp parallel for + for (int j = 0; j < cctk_lsh[2]; j++) + for (int i = 0; i < cctk_lsh[0]; i++) { + const int idx_grid = j * cctk_lsh[0] + i; + const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1, + cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]); + alpha[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = 1.0 + val; + } +#endif + //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); + //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); + //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + // ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0, + // coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z, + // 0.0, cp->transform_z, ms->nb_coeffs_x); + //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0, + // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x, + // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]); + + ms->grid_expand_time += gettime() - expand_start; + ms->grid_expand_count++; CCTK_TimerStop("MaximalSlicingAxi_Expand"); - count++; - if (!(count & 15)) - fprintf(stderr, "avg %g total %g\n", total / count, total); + //CCTK_TimerStart("MaximalSlicingAxi_Polish"); + //msa_sor(ms, cp); + //CCTK_TimerStop("MaximalSlicingAxi_Polish"); + + /* print stats */ + if (!(ms->grid_expand_count & 255)) { + fprintf(stderr, "Maximal slicing stats:\n"); + + ms_solver_print_stats(ms->solver); + fprintf(stderr, + "%lu evals: total time %g s, avg time per call %g ms\n", + ms->grid_expand_count, (double)ms->grid_expand_time / 1e6, + (double)ms->grid_expand_time / ms->grid_expand_count / 1e3); + + } } |