diff options
Diffstat (limited to 'src/qms.c')
-rw-r--r-- | src/qms.c | 899 |
1 files changed, 171 insertions, 728 deletions
@@ -1,3 +1,5 @@ +#include "common.h" + #include <ctype.h> #include <errno.h> #include <float.h> @@ -9,10 +11,6 @@ #include <string.h> #include <cblas.h> -#include <lapacke.h> - -#include <cl.h> -#include <clBLAS.h> #include "cctk.h" #include "cctk_Arguments.h" @@ -21,588 +19,19 @@ #include "util_Table.h" #include "qms.h" - -#define ACC_TEST 0 +#include "qms_solve.h" double scale_factor; -/* mapping between our indices and thorn names */ -static const char *metric_vars[] = { -#if CCZ4 - [GTXX] = "ML_CCZ4::gt11", - [GTYY] = "ML_CCZ4::gt22", - [GTZZ] = "ML_CCZ4::gt33", - [GTXY] = "ML_CCZ4::gt12", - [GTXZ] = "ML_CCZ4::gt13", - [GTYZ] = "ML_CCZ4::gt23", - [ATXX] = "ML_CCZ4::At11", - [ATYY] = "ML_CCZ4::At22", - [ATZZ] = "ML_CCZ4::At33", - [ATXY] = "ML_CCZ4::At12", - [ATXZ] = "ML_CCZ4::At13", - [ATYZ] = "ML_CCZ4::At23", - [PHI] = "ML_CCZ4::phi", - [K] = "ML_CCZ4::trK", - [XTX] = "ML_CCZ4::Xt1", - [XTY] = "ML_CCZ4::Xt2", - [XTZ] = "ML_CCZ4::Xt3", - [BETAX] = "ML_CCZ4::beta1", - [BETAY] = "ML_CCZ4::beta2", - [BETAZ] = "ML_CCZ4::beta3", - [ALPHA] = "ML_CCZ4::alpha", - [KDOT_XX] = "ML_CCZ4::Kdot11", - [KDOT_YY] = "ML_CCZ4::Kdot22", - [KDOT_ZZ] = "ML_CCZ4::Kdot33", - [KDOT_XY] = "ML_CCZ4::Kdot12", - [KDOT_XZ] = "ML_CCZ4::Kdot13", - [KDOT_YZ] = "ML_CCZ4::Kdot23", - [XTDOT_X] = "ML_CCZ4::Xtdot1", - [XTDOT_Y] = "ML_CCZ4::Xtdot2", - [XTDOT_Z] = "ML_CCZ4::Xtdot3", - [PHIDOT] = "ML_CCZ4::phidot", -#else - [GTXX] = "ML_BSSN::gt11", - [GTYY] = "ML_BSSN::gt22", - [GTZZ] = "ML_BSSN::gt33", - [GTXY] = "ML_BSSN::gt12", - [GTXZ] = "ML_BSSN::gt13", - [GTYZ] = "ML_BSSN::gt23", - [ATXX] = "ML_BSSN::At11", - [ATYY] = "ML_BSSN::At22", - [ATZZ] = "ML_BSSN::At33", - [ATXY] = "ML_BSSN::At12", - [ATXZ] = "ML_BSSN::At13", - [ATYZ] = "ML_BSSN::At23", - [PHI] = "ML_BSSN::phi", - [K] = "ML_BSSN::trK", - [XTX] = "ML_BSSN::Xt1", - [XTY] = "ML_BSSN::Xt2", - [XTZ] = "ML_BSSN::Xt3", - [BETAX] = "ML_BSSN::beta1", - [BETAY] = "ML_BSSN::beta2", - [BETAZ] = "ML_BSSN::beta3", - [ALPHA] = "ML_BSSN::alpha", - //[ALPHA] = "ADMBase::alp", - [KDOT_XX] = "ML_BSSN::Kdot11", - [KDOT_YY] = "ML_BSSN::Kdot22", - [KDOT_ZZ] = "ML_BSSN::Kdot33", - [KDOT_XY] = "ML_BSSN::Kdot12", - [KDOT_XZ] = "ML_BSSN::Kdot13", - [KDOT_YZ] = "ML_BSSN::Kdot23", - [XTDOT_X] = "ML_BSSN::Xtdot1", - [XTDOT_Y] = "ML_BSSN::Xtdot2", - [XTDOT_Z] = "ML_BSSN::Xtdot3", - [PHIDOT] = "ML_BSSN::phidot", -#endif -}; - -/* mapping between the cactus grid values and interpolated values */ -static const CCTK_INT interp_operation_indices[] = { - [I_GTXX] = GTXX, - [I_GTYY] = GTYY, - [I_GTZZ] = GTZZ, - [I_GTXY] = GTXY, - [I_GTXZ] = GTXZ, - [I_GTYZ] = GTYZ, - [I_PHI] = PHI, - [I_PHI_DX] = PHI, - [I_PHI_DY] = PHI, - [I_PHI_DZ] = PHI, - [I_ATXX] = ATXX, - [I_ATYY] = ATYY, - [I_ATZZ] = ATZZ, - [I_ATXY] = ATXY, - [I_ATXZ] = ATXZ, - [I_ATYZ] = ATYZ, - [I_K] = K, - [I_K_DX] = K, - [I_K_DY] = K, - [I_K_DZ] = K, - [I_XTX] = XTX, - [I_XTY] = XTY, - [I_XTZ] = XTZ, - [I_BETAX] = BETAX, - [I_BETAY] = BETAY, - [I_BETAZ] = BETAZ, - [I_ALPHA] = ALPHA, - [I_ALPHA_DX] = ALPHA, - [I_ALPHA_DY] = ALPHA, - [I_ALPHA_DZ] = ALPHA, - [I_ALPHA_DXX] = ALPHA, - [I_ALPHA_DYY] = ALPHA, - [I_ALPHA_DZZ] = ALPHA, - [I_ALPHA_DXY] = ALPHA, - [I_ALPHA_DXZ] = ALPHA, - [I_ALPHA_DYZ] = ALPHA, - [I_KDOT_XX] = KDOT_XX, - [I_KDOT_YY] = KDOT_YY, - [I_KDOT_ZZ] = KDOT_ZZ, - [I_KDOT_XY] = KDOT_XY, - [I_KDOT_XZ] = KDOT_XZ, - [I_KDOT_YZ] = KDOT_YZ, - [I_XTDOT_X] = XTDOT_X, - [I_XTDOT_Y] = XTDOT_Y, - [I_XTDOT_Z] = XTDOT_Z, - [I_PHIDOT] = PHIDOT, - [I_PHIDOT_DX] = PHIDOT, - [I_PHIDOT_DY] = PHIDOT, - [I_PHIDOT_DZ] = PHIDOT, -}; - -/* the operation (plain value or x/y/z-derivative) to apply during interpolation */ -static const CCTK_INT interp_operation_codes[] = { - [I_GTXX] = 0, - [I_GTYY] = 0, - [I_GTZZ] = 0, - [I_GTXY] = 0, - [I_GTXZ] = 0, - [I_GTYZ] = 0, - [I_PHI] = 0, - [I_PHI_DX] = 1, - [I_PHI_DY] = 2, - [I_PHI_DZ] = 3, - [I_ATXX] = 0, - [I_ATYY] = 0, - [I_ATZZ] = 0, - [I_ATXY] = 0, - [I_ATXZ] = 0, - [I_ATYZ] = 0, - [I_K] = 0, - [I_K_DX] = 1, - [I_K_DY] = 2, - [I_K_DZ] = 3, - [I_XTX] = 0, - [I_XTY] = 0, - [I_XTZ] = 0, - [I_BETAX] = 0, - [I_BETAY] = 0, - [I_BETAZ] = 0, - [I_ALPHA] = 0, - [I_ALPHA_DX] = 1, - [I_ALPHA_DY] = 2, - [I_ALPHA_DZ] = 3, - [I_ALPHA_DXX] = 11, - [I_ALPHA_DYY] = 22, - [I_ALPHA_DZZ] = 33, - [I_ALPHA_DXY] = 12, - [I_ALPHA_DXZ] = 13, - [I_ALPHA_DYZ] = 23, - [I_KDOT_XX] = 0, - [I_KDOT_YY] = 0, - [I_KDOT_ZZ] = 0, - [I_KDOT_XY] = 0, - [I_KDOT_XZ] = 0, - [I_KDOT_YZ] = 0, - [I_XTDOT_X] = 0, - [I_XTDOT_Y] = 0, - [I_XTDOT_Z] = 0, - [I_PHIDOT] = 0, - [I_PHIDOT_DX] = 1, - [I_PHIDOT_DY] = 2, - [I_PHIDOT_DZ] = 3, -}; - -static void init_opencl(MaximalSlicingContext *ms) -{ - int err, count; - cl_platform_id platform; - cl_context_properties props[3]; - - err = clGetPlatformIDs(1, &platform, &count); - if (err != CL_SUCCESS || count < 1) { - fprintf(stderr, "Could not get an OpenCL platform ID\n"); - return; - } - - err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ms->ocl_device, &count); - if (err != CL_SUCCESS || count < 1) { - fprintf(stderr, "Could not get an OpenCL device ID\n"); - return; - } - - props[0] = CL_CONTEXT_PLATFORM; - props[1] = (cl_context_properties)platform; - props[2] = 0; - - ms->cl_ctx = clCreateContext(props, 1, &ms->ocl_device, NULL, NULL, &err); - if (err != CL_SUCCESS || !ms->cl_ctx) { - fprintf(stderr, "Could not create an OpenCL context\n"); - return; - } - - ms->cl_queue = clCreateCommandQueue(ms->cl_ctx, ms->ocl_device, 0, &err); - if (err != CL_SUCCESS || !ms->cl_queue) { - fprintf(stderr, "Could not create an OpenCL command queue: %d\n", err); - goto fail; - } - - err = clblasSetup(); - if (err != CL_SUCCESS) { - fprintf(stderr, "Error setting up clBLAS\n"); - goto fail; - } - - ms->ocl_coeffs = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - - ms->bicgstab.cl_p = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_v = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_y = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_z = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_t = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_res = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_res0 = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_tmp = clCreateBuffer(ms->cl_ctx, 0, ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_tmp1 = clCreateBuffer(ms->cl_ctx, 0, 2 * ms->nb_coeffs * sizeof(double), NULL, &err); - - ms->bicgstab.cl_k = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err); - ms->bicgstab.cl_mat = clCreateBuffer(ms->cl_ctx, 0, ms->nb_colloc_points * ms->nb_coeffs * sizeof(double), NULL, &err); - - ms->bicgstab.cl_rho = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - ms->bicgstab.cl_alpha = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - ms->bicgstab.cl_beta = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - ms->bicgstab.cl_omega = clCreateBuffer(ms->cl_ctx, 0, 2 * sizeof(double), NULL, &err); - ms->bicgstab.cl_omega1 = clCreateBuffer(ms->cl_ctx, 0, sizeof(double), NULL, &err); - - return; -fail: - if (ms->cl_queue) - clReleaseCommandQueue(ms->cl_queue); - ms->cl_queue = 0; - - if (ms->cl_ctx) - clReleaseContext(ms->cl_ctx); - ms->cl_ctx = 0; -} - -static void construct_filter_matrix(MaximalSlicingContext *ms, double filter_power) +/* get an approximate "main" frequency component in a basis function */ +static double calc_basis_freq(const BasisSet *b, int order) { - char equed = 'N'; - double cond, ferr, berr, rpivot; - - double *m, *minv, *scale, *tmp; - int *ipiv; - int idx_coeff_x, idx_coeff_z, idx_grid_x, idx_grid_z; - int N = ms->nb_coeffs; - - ms->input_filter = malloc(sizeof(*m) * N * N); - - m = malloc(sizeof(*m) * N * N); - minv = malloc(sizeof(*m) * N * N); - scale = malloc(sizeof(*m) * N * N); - tmp = malloc(sizeof(*m) * N * N); - ipiv = malloc(sizeof(*ipiv) * N); - -#define BASIS_X (ms->basis_x_val [idx_grid_x * ms->nb_coeffs_x + idx_coeff_x]) -#define BASIS_Z (ms->basis_z_val [idx_grid_z * ms->nb_coeffs_z + idx_coeff_z]) - for (idx_grid_z = 0; idx_grid_z < ms->nb_colloc_points_z - 0; idx_grid_z++) { - for (idx_grid_x = 0; idx_grid_x < ms->nb_colloc_points_x - 0; idx_grid_x++) { - int idx_grid = idx_grid_z * ms->nb_colloc_points_x + idx_grid_x; - - for (idx_coeff_z = 0; idx_coeff_z < ms->nb_coeffs_z; idx_coeff_z++) - for (idx_coeff_x = 0; idx_coeff_x < ms->nb_coeffs_x; idx_coeff_x++) { - const int idx_coeff = idx_coeff_z * ms->nb_coeffs_x + idx_coeff_x; - - minv[idx_grid + ms->nb_colloc_points * idx_coeff] = BASIS_X * BASIS_Z; - scale[idx_grid + ms->nb_colloc_points * idx_coeff] = (idx_grid == idx_coeff) ? - exp(-36.0 * pow((double)idx_grid_x / ms->nb_coeffs_x, filter_power)) * - exp(-36.0 * pow((double)idx_grid_z / ms->nb_coeffs_z, filter_power)) : 0.0; - - scale[idx_grid + ms->nb_colloc_points * idx_coeff] = (idx_grid == idx_coeff) ? 1.0 : 0.0; - //if (idx_coeff_z == idx_grid_z && idx_coeff_z == 0 && idx_grid_x == idx_coeff_x) - // fprintf(stderr, "%d %g\n", idx_coeff_x, scale[idx_grid + ms->nb_colloc_points * idx_coeff]); - } - } - } - - memcpy(m, minv, sizeof(*m) * N * N); - LAPACKE_dgetrf(LAPACK_COL_MAJOR, N, N, m, N, ipiv); - LAPACKE_dgetri(LAPACK_COL_MAJOR, N, m, N, ipiv); - - cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - N, N, N, 1.0, scale, N, m, N, 0.0, tmp, N); - cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - N, N, N, 1.0, minv, N, tmp, N, 0.0, ms->input_filter, N); - - free(m); - free(minv); - free(scale); - free(tmp); - free(ipiv); -} - -static MaximalSlicingContext *init_ms(cGH *cctkGH, - int basis_order_r, int basis_order_z, - double sf, double filter_power, double input_filter_power, - CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z, - const int grid_size[3]) -{ - MaximalSlicingContext *ms; - int ret; - - ms = calloc(1, sizeof(*ms)); - - ms->gh = cctkGH; - - ms->basis = &qms_sb_even_basis; - //ms->basis = &qms_cheb_basis; - //ms->basis = &qms_cheb_even_basis; - //ms->basis = &qms_tl_basis; -#if POLAR - ms->basis1 = &qms_cos_even_basis; -#else - ms->basis1 = &qms_sb_even_basis; -#endif - - ms->nb_coeffs_x = basis_order_r; - ms->nb_coeffs_z = basis_order_z; - - ms->nb_coeffs = ms->nb_coeffs_x * ms->nb_coeffs_z; - - ms->nb_colloc_points_x = basis_order_r; - ms->nb_colloc_points_z = basis_order_z; - - ms->nb_colloc_points = ms->nb_colloc_points_x * ms->nb_colloc_points_z; - - if (ms->nb_colloc_points != ms->nb_coeffs) - CCTK_WARN(0, "Non-square collocation matrix"); - - ms->colloc_grid_order_x = ms->nb_colloc_points_x; - ms->colloc_grid_order_z = ms->nb_colloc_points_z; - - ms->mat = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points); - ms->coeffs = malloc(sizeof(double) * ms->nb_coeffs); - ms->rhs = malloc(sizeof(double) * ms->nb_colloc_points); - - ms->coeffs_eval = malloc(sizeof(double) * ms->nb_coeffs); - for (int i = 0; i < ARRAY_ELEMS(ms->solution_cache); i++) { - ms->solution_cache[i].coeffs = malloc(sizeof(double) * ms->nb_coeffs); - if (!ms->solution_cache[i].coeffs) - CCTK_WARN(0, "malloc failure"); - } - - ms->mat_f = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points); - ms->ipiv = malloc(sizeof(*ms->ipiv) * ms->nb_coeffs); - -#if 1 - scale_factor = 1.0; - - //scale_factor = (x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] * 0.75) / ms->basis->colloc_point(ms->colloc_grid_order_x, ms->nb_colloc_points_x - 1); - scale_factor = (64.0 / ms->basis->colloc_point(ms->colloc_grid_order_x, ms->nb_colloc_points_x - 1)); - //scale_factor = (x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)]); - //scale_factor = x[CCTK_GFINDEX3D(cctkGH, grid_size[0] - 1, 0, 0)] - 0.5; - fprintf(stderr, "scale factor %16.16g\n", scale_factor); - -#else - scale_factor = sf; -#endif - - /* initialize the collocation grid */ - posix_memalign((void**)&ms->colloc_grid[0], 32, ms->nb_colloc_points_x * sizeof(*ms->colloc_grid[0])); - posix_memalign((void**)&ms->colloc_grid[1], 32, ms->nb_colloc_points_z * sizeof(*ms->colloc_grid[1])); - - for (int i = 0; i < ms->nb_colloc_points_x; i++) { - ms->colloc_grid[0][i] = ms->basis->colloc_point(ms->colloc_grid_order_x, i); - fprintf(stderr, "%d %g\n", i, ms->colloc_grid[0][i]); - } - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - ms->colloc_grid[1][i] = ms->basis1->colloc_point(ms->colloc_grid_order_z, i); - fprintf(stderr, "%d %g\n", i, ms->colloc_grid[1][i] / (POLAR ? M_PI : 1.0)); - } - - /* precompute the basis values we will need */ - ms->basis_x_val = malloc(sizeof(*ms->basis_x_val) * ms->nb_colloc_points_x * ms->nb_coeffs_x); - ms->basis_x_dval = malloc(sizeof(*ms->basis_x_dval) * ms->nb_colloc_points_x * ms->nb_coeffs_x); - ms->basis_x_d2val = malloc(sizeof(*ms->basis_x_d2val) * ms->nb_colloc_points_x * ms->nb_coeffs_x); - for (int i = 0; i < ms->nb_colloc_points_x; i++) { - CCTK_REAL coord = ms->colloc_grid[0][i]; - for (int j = 0; j < ms->nb_coeffs_x; j++) { - ms->basis_x_val [i * ms->nb_coeffs_x + j] = ms->basis->eval(coord, j); - ms->basis_x_dval [i * ms->nb_coeffs_x + j] = ms->basis->eval_diff1(coord, j); - ms->basis_x_d2val[i * ms->nb_coeffs_x + j] = ms->basis->eval_diff2(coord, j); - } - } - - ms->basis_z_val = malloc(sizeof(*ms->basis_z_val) * ms->nb_colloc_points_z * ms->nb_coeffs_z); - ms->basis_z_dval = malloc(sizeof(*ms->basis_z_dval) * ms->nb_colloc_points_z * ms->nb_coeffs_z); - ms->basis_z_d2val = malloc(sizeof(*ms->basis_z_d2val) * ms->nb_colloc_points_z * ms->nb_coeffs_z); - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - CCTK_REAL coord = ms->colloc_grid[1][i]; - for (int j = 0; j < ms->nb_coeffs_z; j++) { - ms->basis_z_val [i * ms->nb_coeffs_z + j] = ms->basis1->eval(coord, j); - ms->basis_z_dval [i * ms->nb_coeffs_z + j] = ms->basis1->eval_diff1(coord, j); - ms->basis_z_d2val[i * ms->nb_coeffs_z + j] = ms->basis1->eval_diff2(coord, j); - } - } - - posix_memalign((void**)&ms->basis_val_00, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00)); - posix_memalign((void**)&ms->basis_val_11, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00)); - posix_memalign((void**)&ms->basis_val_10, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00)); - posix_memalign((void**)&ms->basis_val_01, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00)); - posix_memalign((void**)&ms->basis_val_02, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00)); - posix_memalign((void**)&ms->basis_val_20, 32, ms->nb_colloc_points * ms->nb_coeffs * sizeof(*ms->basis_val_00)); - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - const double *basis_z = ms->basis_z_val + i * ms->nb_coeffs_z; - const double *dbasis_z = ms->basis_z_dval + i * ms->nb_coeffs_z; - const double *d2basis_z = ms->basis_z_d2val + i * ms->nb_coeffs_z; - - for (int j = 0; j < ms->nb_colloc_points_x; j++) { - const double *basis_x = ms->basis_x_val + j * ms->nb_coeffs_x; - const double *dbasis_x = ms->basis_x_dval + j * ms->nb_coeffs_x; - const double *d2basis_x = ms->basis_x_d2val + j * ms->nb_coeffs_x; - const int idx_grid = i * ms->nb_colloc_points_x + j; - -#if POLAR - double r = ms->colloc_grid[0][j]; - double theta = ms->colloc_grid[1][i]; - - double x = r * cos(theta); - double z = r * sin(theta); -#else - double x = ms->colloc_grid[0][j]; - double z = ms->colloc_grid[1][i]; -#endif - - for (int k = 0; k < ms->nb_coeffs_z; k++) - for (int l = 0; l < ms->nb_coeffs_x; l++) { - const int idx_coeff = k * ms->nb_coeffs_x + l; - const int idx = idx_grid + ms->nb_colloc_points * idx_coeff; - ms->basis_val_00[idx] = basis_x[l] * basis_z[k]; -#if POLAR - ms->basis_val_10[idx] = ((r > EPS) ? (dbasis_x[l] * basis_z[k] * x / r - basis_x[l] * dbasis_z[k] * z / SQR(r)) : 0.0); - ms->basis_val_01[idx] = ((r > EPS) ? (dbasis_x[l] * basis_z[k] * z / r + basis_x[l] * dbasis_z[k] * x / SQR(r)) : 0.0); - ms->basis_val_20[idx] = ((r > EPS) ? (SQR(x / r) * d2basis_x[l] * basis_z[k] + SQR(z / SQR(r)) * basis_x[l] * d2basis_z[k] - + (1.0 - SQR(x / r)) / r * dbasis_x[l] * basis_z[k] - + 2 * x * z / SQR(SQR(r)) * basis_x[l] * dbasis_z[k] - - 2 * z * x / (r * SQR(r)) * dbasis_x[l] * dbasis_z[k]) : 0.0); - ms->basis_val_02[idx] = ((r > EPS) ? (SQR(z / r) * d2basis_x[l] * basis_z[k] + SQR(x / SQR(r)) * basis_x[l] * d2basis_z[k] - + (1.0 - SQR(z / r)) / r * dbasis_x[l] * basis_z[k] - - 2 * x * z / SQR(SQR(r)) * basis_x[l] * dbasis_z[k] - + 2 * z * x / (r * SQR(r)) * dbasis_x[l] * dbasis_z[k]) : 0.0); - ms->basis_val_11[idx] = ((r > EPS) ? (x * z / SQR(r) * d2basis_x[l] * basis_z[k] - x * z / SQR(SQR(r)) * basis_x[l] * d2basis_z[k] - - x * z / (r * SQR(r)) * dbasis_x[l] * basis_z[k] - - (1.0 - SQR(z / r)) / SQR(r) * basis_x[l] * dbasis_z[k] - + (SQR(x) - SQR(z)) / (r * SQR(r)) * dbasis_x[l] * dbasis_z[k]) : 0.0); -#else - ms->basis_val_10[idx] = dbasis_x[l] * basis_z[k]; - ms->basis_val_01[idx] = basis_x[l] * dbasis_z[k]; - ms->basis_val_20[idx] = d2basis_x[l] * basis_z[k]; - ms->basis_val_02[idx] = basis_x[l] * d2basis_z[k]; - ms->basis_val_11[idx] = dbasis_x[l] * dbasis_z[k]; -#endif - } - } - } - - posix_memalign((void**)&ms->eq_coeff_00, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00)); - posix_memalign((void**)&ms->eq_coeff_11, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00)); - posix_memalign((void**)&ms->eq_coeff_10, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00)); - posix_memalign((void**)&ms->eq_coeff_01, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00)); - posix_memalign((void**)&ms->eq_coeff_02, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00)); - posix_memalign((void**)&ms->eq_coeff_20, 32, ms->nb_colloc_points * sizeof(*ms->eq_coeff_00)); - - ms->interp_coords[0] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[0])); - ms->interp_coords[1] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[1])); - ms->interp_coords[2] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_coords[2])); - for (int i = 0; i < ms->nb_colloc_points_z; i++) { - for (int j = 0; j < ms->nb_colloc_points_x; j++) { -#if POLAR - double phi = ms->colloc_grid[1][i]; - double r = ms->colloc_grid[0][j]; - - double x = r * cos(phi); - double z = r * sin(phi); -#else - double x = ms->colloc_grid[0][j]; - double z = ms->colloc_grid[1][i]; -#endif - - ms->interp_coords[0][i * ms->nb_colloc_points_x + j] = x; - ms->interp_coords[1][i * ms->nb_colloc_points_x + j] = 0; - ms->interp_coords[2][i * ms->nb_colloc_points_x + j] = z; - } - } - - for (int i = 0; i < ARRAY_ELEMS(ms->metric_u); i++) - ms->metric_u[i] = malloc(ms->nb_colloc_points * sizeof(*ms->interp_values[i])); - - ms->kij_kij = malloc(ms->nb_colloc_points * sizeof(*ms->kij_kij)); - - ms->coeff_scale = malloc(ms->nb_coeffs * sizeof(double)); - for (int j = 0; j < ms->nb_coeffs_z; j++) - for (int i = 0; i < ms->nb_coeffs_x; i++) { - //ms->coeff_scale[j * ms->nb_coeffs_x + i] = 1.0; - ms->coeff_scale[j * ms->nb_coeffs_x + i] = exp(-36.0 * pow((double)i / ms->nb_coeffs_x, filter_power)) * - exp(-36.0 * pow((double)j / ms->nb_coeffs_z, filter_power)); - //ms->coeff_scale[j * ms->nb_coeffs_x + i] = ((i < (2.0 / 3.0) * ms->nb_coeffs_x) ? 1.0 : SQR(cos((((double)i / ms->nb_coeffs_x) - (2.0 / 3.0)) * 3.0 * M_PI / 2.0))) * - // ((j < (2.0 / 3.0) * ms->nb_coeffs_z) ? 1.0 : SQR(cos((((double)j / ms->nb_coeffs_z) - (2.0 / 3.0)) * 3.0 * M_PI / 2.0))); - } - - for (int i = 0; i < ARRAY_ELEMS(ms->interp_values); i++) { - ms->interp_values[i] = malloc(sizeof(*ms->interp_values[i]) * ms->nb_colloc_points); - ms->interp_values_prefilter[i] = malloc(sizeof(*ms->interp_values[i]) * ms->nb_colloc_points); - if (!ms->interp_values[i] || !ms->interp_values_prefilter[i]) - CCTK_WARN(0, "Malloc failure"); - ms->interp_value_codes[i] = CCTK_VARIABLE_REAL; - } - - for (int i = 0; i < ARRAY_ELEMS(metric_vars); i++) { - ms->interp_vars_indices[i] = CCTK_VarIndex(metric_vars[i]); - if (ms->interp_vars_indices[i] < 0) - CCTK_VWarn(0, __LINE__, __FILE__, CCTK_THORNSTRING, "Error getting the index of variable: %s\n", metric_vars[i]); - } - - ms->coord_system = CCTK_CoordSystemHandle("cart3d"); - if (ms->coord_system < 0) - CCTK_WARN(0, "Error getting the coordinate system"); - - ms->interp_operator = CCTK_InterpHandle("Lagrange polynomial interpolation (tensor product)"); - if (ms->interp_operator < 0) - CCTK_WARN(0, "Error getting the interpolation operator"); - - ms->interp_params = Util_TableCreateFromString("order=4 want_global_mode=1"); - if (ms->interp_params < 0) - CCTK_WARN(0, "Error creating interpolation parameters table"); - - ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS, - interp_operation_codes, "operation_codes"); - if (ret < 0) - CCTK_WARN(0, "Error setting operation codes"); - - ret = Util_TableSetIntArray(ms->interp_params, NB_INTERP_VARS, - interp_operation_indices, "operand_indices"); - if (ret < 0) - CCTK_WARN(0, "Error setting operand indices"); - - ms->bicgstab.p = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.v = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.y = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.z = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.t = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.res = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.res0 = malloc(sizeof(double) * ms->nb_coeffs); - ms->bicgstab.k = malloc(sizeof(double) * ms->nb_coeffs * ms->nb_colloc_points); - - ms->steps_since_inverse = INT_MAX; - - init_opencl(ms); - - construct_filter_matrix(ms, input_filter_power); - - CCTK_TimerCreate("MaximalSlicingAxi_Solve"); - CCTK_TimerCreate("MaximalSlicingAxi_Expand"); - CCTK_TimerCreate("MaximalSlicingAxi_interp_geometry"); - CCTK_TimerCreate("MaximalSlicingAxi_calc_eq_coeffs"); - CCTK_TimerCreate("MaximalSlicingAxi_construct_matrix"); - CCTK_TimerCreate("MaximalSlicingAxi_filter_input"); - CCTK_TimerCreate("MaximalSlicingAxi_solve_LU"); - CCTK_TimerCreate("MaximalSlicingAxi_solve_BiCGSTAB"); - CCTK_TimerCreate("MaximalSlicingAxi_Polish"); - - return ms; + return b->colloc_point(order, 1); } -static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, - CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z) +static CoordPatch *get_coord_patch(QMSContext *ms, + CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z, + double scale_factor, double scale_power) { cGH *cctkGH = ms->gh; @@ -645,48 +74,9 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, if (i == cp->size[1]) CCTK_WARN(0, "The grid does not include y==0"); -#if 0 - posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]); - for (int j = 0; j < ms->gh->cctk_lsh[1]; j++) - for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { - CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; - CCTK_REAL yy = y[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; - CCTK_REAL r = sqrt(SQR(xx) + SQR(yy)); - - for (int k = 0; k < ms->nb_coeffs_x; k++) - //cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) * ms->nb_coeffs_x + k] = ms->basis->eval(r, k); - cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) + ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0] * k] = ms->basis->eval(r, k); - } - - posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]); - for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) { - CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)]; - for (int j = 0; j < ms->nb_coeffs_z; j++) - cp->basis_val_z [i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j); - //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j); - } - posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->nb_coeffs_x); - posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size); - for (int i = 0; i < grid_size; i++) - cp->one[i] = 1.0; -#else - posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->nb_coeffs_x * ms->gh->cctk_lsh[0]); - for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { - CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; - - for (int k = 0; k < ms->nb_coeffs_x; k++) - cp->basis_val_r[i * ms->nb_coeffs_x + k] = ms->basis->eval(fabs(xx), k); - } - - posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->nb_coeffs_z * ms->gh->cctk_lsh[2]); - for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) { - CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)]; - for (int j = 0; j < ms->nb_coeffs_z; j++) - cp->basis_val_z[i * ms->nb_coeffs_z + j] = ms->basis->eval(fabs(zz), j); - } - - posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * ms->nb_coeffs_x * cp->size[0] * cp->size[2]); - posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * ms->nb_coeffs_z * cp->size[0] * cp->size[2]); +#if QMS_POLAR || 1 + posix_memalign((void**)&cp->transform_matrix, 32, sizeof(*cp->transform_matrix) * ms->solver->nb_coeffs[0] * cp->size[0] * cp->size[2]); + posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * ms->solver->nb_coeffs[1] * cp->size[0] * cp->size[2]); #pragma omp parallel for for (int j = 0; j < cp->size[2]; j++) { CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)]; @@ -694,10 +84,11 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, for (int i = 0; i < cp->size[0]; i++) { const int idx_grid = j * cp->size[0] + i; - CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; + double xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; + double rr = sqrt(SQR(xx) + SQR(zz)); -#if POLAR - double coord0 = sqrt(SQR(xx) + SQR(zz)); +#if QMS_POLAR + double coord0 = rr; double coord1 = atan2(zz, xx); #else double coord0 = xx; @@ -709,51 +100,88 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, // const int idx_coeff = k * ms->nb_coeffs_x + l; // cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * idx_coeff] = ms->basis->eval(r, l) * ms->basis1->eval(phi, k); // } - for (int k = 0; k < ms->nb_coeffs_x; k++) - cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->basis->eval(coord0, k); - for (int k = 0; k < ms->nb_coeffs_z; k++) - cp->transform_matrix1[idx_grid * ms->nb_coeffs_z + k] = ms->basis1->eval(coord1, k); + for (int k = 0; k < ms->solver->nb_coeffs[0]; k++) { + double dx = calc_basis_freq(ms->solver->basis[0], k); + double r0 = dx * scale_factor; + double fact = exp(-36.0 * pow(rr / r0, scale_power)); + + cp->transform_matrix[idx_grid + cp->size[0] * cp->size[2] * k] = ms->solver->basis[0]->eval(coord0, k) * fact; + } + for (int k = 0; k < ms->solver->nb_coeffs[1]; k++) { + double dx = calc_basis_freq(ms->solver->basis[1], k); + double r0 = dx * scale_factor; + double fact = exp(-36.0 * pow(rr / r0, scale_power)); + + cp->transform_matrix1[idx_grid * ms->solver->nb_coeffs[1] + k] = ms->solver->basis[1]->eval(coord1, k) * fact; + } } } - posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * ms->nb_coeffs_z); + posix_memalign((void**)&cp->transform_tmp, 32, sizeof(*cp->transform_tmp) * cp->size[0] * cp->size[2] * ms->solver->nb_coeffs[1]); +#else + posix_memalign((void**)&cp->basis_val_r, 32, sizeof(*cp->basis_val_r) * ms->solver->nb_coeffs[0] * ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0]); + for (int j = 0; j < ms->gh->cctk_lsh[1]; j++) + for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { + CCTK_REAL xx = x[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; + CCTK_REAL yy = y[CCTK_GFINDEX3D(ms->gh, i, j, 0)]; + CCTK_REAL r = sqrt(SQR(xx) + SQR(yy)); + + for (int k = 0; k < ms->solver->nb_coeffs[0]; k++) + //cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) * ms->nb_coeffs_x + k] = ms->basis->eval(r, k); + cp->basis_val_r [(j * ms->gh->cctk_lsh[0] + i) + ms->gh->cctk_lsh[1] * ms->gh->cctk_lsh[0] * k] = ms->solver->basis[0]->eval(r, k); + } + + posix_memalign((void**)&cp->basis_val_z, 32, sizeof(*cp->basis_val_z) * ms->solver->nb_coeffs[1] * ms->gh->cctk_lsh[2]); + for (int i = 0; i < ms->gh->cctk_lsh[2]; i++) { + CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, i)]; + for (int j = 0; j < ms->solver->nb_coeffs[1]; j++) + cp->basis_val_z [i * ms->solver->nb_coeffs[1] + j] = ms->solver->basis[0]->eval(fabs(zz), j); + //cp->basis_val_z [i + ms->gh->cctk_lsh[2] * j] = ms->basis->eval(zz, j); + } + posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->solver->nb_coeffs[0]); + posix_memalign((void**)&cp->one, 32, sizeof(*cp->one) * grid_size); + for (int i = 0; i < grid_size; i++) + cp->one[i] = 1.0; + + posix_memalign((void**)&cp->w_scale, 32, sizeof(*cp->w_scale) * grid_size); + for (int k = 0; k < ms->gh->cctk_lsh[2]; k++) + for (int j = 0; j < ms->gh->cctk_lsh[1]; j++) + for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { + int idx = CCTK_GFINDEX3D(ms->gh, i, j, k); + double r = sqrt(SQR(x[idx]) + SQR(y[idx]) + SQR(z[idx])); + const double R = 32.0; + const double width = 4.0; + cp->w_scale[idx] = (r > R) ? exp(-pow((r - R) / width, 4.0)) : 1.0; + } #endif ms->nb_patches++; return cp; } -static MaximalSlicingContext *ms_context; +static QMSContext *qms_context; void quasimaximal_slicing_axi_solve(CCTK_ARGUMENTS) { - MaximalSlicingContext *ms; + QMSContext *ms; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; double time; - if (!ms_context) { - ms_context = init_ms(cctkGH, basis_order_r, basis_order_z, - scale_factor, filter_power, input_filter_power, x, y, z, cctk_lsh); - } - ms = ms_context; + ms = qms_context; time = cctkGH->cctk_time / ms->gh->cctk_delta_time; - if (ms->gh->cctk_levfac[0] != 1 || - fabs(time - ceilf(time)) > 1e-8) + if (ms->gh->cctk_levfac[0] != 1 || fabs(time - ceilf(time)) > 1e-8 || + (ms->nb_solutions && ms->solution_cache[ms->nb_solutions - 1].time == cctkGH->cctk_time)) return; - fprintf(stderr, "qms solve: time %g %g\n", ms->gh->cctk_time, time); - - CCTK_TimerStart("MaximalSlicingAxi_Solve"); - qms_maximal_solve(ms); - CCTK_TimerStop("MaximalSlicingAxi_Solve"); - - if (export_coeffs) - memcpy(w_coeffs, ms->coeffs, sizeof(*w_coeffs) * ms->nb_coeffs); + CCTK_TimerStart("QuasiMaximalSlicing_Solve"); + qms_solver_solve(ms->solver); + CCTK_TimerStop("QuasiMaximalSlicing_Solve"); + fprintf(stderr, "%d qms solve: time %g %g %g\n", ms->gh->cctk_levfac[0], ms->gh->cctk_time, time, ms->solver->coeffs[0]); if (1) { double *tmp; if (ms->nb_solutions == ARRAY_ELEMS(ms->solution_cache)) { @@ -763,45 +191,39 @@ void quasimaximal_slicing_axi_solve(CCTK_ARGUMENTS) ms->nb_solutions++; tmp = ms->solution_cache[ms->nb_solutions - 1].coeffs; } - ms->solution_cache[ms->nb_solutions - 1].coeffs = ms->coeffs; + ms->solution_cache[ms->nb_solutions - 1].coeffs = ms->solver->coeffs; ms->solution_cache[ms->nb_solutions - 1].time = ms->gh->cctk_time; - ms->coeffs = tmp; + ms->solver->coeffs = tmp; } } void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) { - MaximalSlicingContext *ms; + QMSContext *ms; CoordPatch *cp; DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; - int64_t expand_start, totaltime_start; + int64_t expand_start; double *coeffs = NULL; int i, ret; - totaltime_start = gettime(); - - /* on the first run, init the solver */ - if (!ms_context) { - ms_context = init_ms(cctkGH, basis_order_r, basis_order_z, - scale_factor, filter_power, input_filter_power, x, y, z, cctk_lsh); - } - ms = ms_context; + ms = qms_context; - cp = get_coord_patch(ms, x, y, z); + cp = get_coord_patch(ms, x, y, z, scale_factor, scale_power); #if 0 - coeffs = ms->coeffs; + //coeffs = ms->coeffs; + coeffs = ms->solution_cache[ms->nb_solutions - 1].coeffs; #else coeffs = ms->coeffs_eval; - if (ms->nb_solutions < 2) { - memset(coeffs, 0, sizeof(*coeffs) * ms->nb_coeffs); + if (cctkGH->cctk_levfac[0] < 1 || ms->nb_solutions < 2) { + memset(coeffs, 0, sizeof(*coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]); //fprintf(stderr, "qms eval: time %g zero\n", ms->gh->cctk_time); } else { double *coeffs0 = ms->solution_cache[ms->nb_solutions - 2].coeffs; @@ -810,16 +232,28 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) double time1 = ms->solution_cache[ms->nb_solutions - 1].time; double time = ms->gh->cctk_time; - //fprintf(stderr, "qms eval: time %g interp from %g %g\n", ms->gh->cctk_time, time0, time1); + //double fact; - for (int i = 0; i < ms->nb_coeffs; i++) + //if (time > 2.0) + // fact = 1.0; + //else if (time < 0.1) + // fact = 0.0; + //else + // fact = (1.0 - exp(-pow((time - 0.0) / 0.25, 4.0))); + //fact = 1.0; + + //fprintf(stderr, "qms eval: time %g interp from %g %g %g\n", ms->gh->cctk_time, time0, time1, fact); + + for (int i = 0; i < ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]; i++) coeffs[i] = coeffs1[i] * (time - time0) / (time1 - time0) + coeffs0[i] * (time - time1) / (time0 - time1); } #endif + if (export_coeffs) + memcpy(w_coeffs, coeffs, sizeof(*w_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]); - CCTK_TimerStart("MaximalSlicingAxi_Expand"); + CCTK_TimerStart("QuasiMaximalSlicing_Expand"); expand_start = gettime(); #if 0 #pragma omp parallel for @@ -831,106 +265,115 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) double r = sqrt(SQR(xx) + SQR(zz)); double phi = atan2(zz, xx); - double val = 1.0; + double val = 0.0; for (int l = 0; l < ms->nb_coeffs_z; l++) { double tmp = 0.0; for (int m = 0; m < ms->nb_coeffs_x; m++) { const int idx_coeff = l * ms->nb_coeffs_x + m; - tmp += ms->coeffs[idx_coeff] * ms->basis->eval(r, m); + tmp += coeffs[idx_coeff] * ms->basis->eval(r, m); } val += tmp * ms->basis1->eval(phi, l); } - alp[idx] = val; + W[idx] = val; } } -#else +#elif QMS_POLAR || 1 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - cctk_lsh[0] * cctk_lsh[2], ms->nb_coeffs_z, ms->nb_coeffs_x, + cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2], - coeffs, ms->nb_coeffs_x, 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]); + coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]); #pragma omp parallel for for (int j = 0; j < cctk_lsh[2]; j++) for (int i = 0; i < cctk_lsh[0]; i++) { const int idx_grid = j * cctk_lsh[0] + i; - const double val = cblas_ddot(ms->nb_coeffs_z, cp->transform_matrix1 + idx_grid * ms->nb_coeffs_z, 1, + const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1, cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]); W[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = val; } -#endif +#else //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); - //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); - //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - // ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0, - // coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z, - // 0.0, cp->transform_z, ms->nb_coeffs_x); - //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0, - // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x, - // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]); + memset(W, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*W)); + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + ms->solver->nb_coeffs[0], cctk_lsh[2], ms->solver->nb_coeffs[1], 1.0, + coeffs, ms->solver->nb_coeffs[0], cp->basis_val_z, ms->solver->nb_coeffs[1], + 0.0, cp->transform_z, ms->solver->nb_coeffs[0]); + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->solver->nb_coeffs[0], 1.0, + cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->solver->nb_coeffs[0], + 1.0, W, cctk_lsh[0] * cctk_lsh[1]); + +// { +// const int grid_size = cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2]; +//#pragma omp parallel for +// for (int i = 0; i < grid_size; i++) +// W[i] *= cp->w_scale[i]; +// } +#endif ms->grid_expand_time += gettime() - expand_start; ms->grid_expand_count++; - CCTK_TimerStop("MaximalSlicingAxi_Expand"); - - ms->solve_time += gettime() - totaltime_start; - ms->solve_count++; + CCTK_TimerStop("QuasiMaximalSlicing_Expand"); /* print stats */ - if (!(ms->solve_count & 255)) { - fprintf(stderr, - "maximal slicing solves: %lu, " - "total time %g s, avg time per call %g ms\n", - ms->solve_count, (double)ms->solve_time / 1e6, - (double)ms->solve_time / ms->solve_count / 1e3); - fprintf(stderr, - "%g%% interpolate geometry: %lu, " - "total time %g s, avg time per call %g ms\n", - (double)ms->interp_geometry_time * 100 / ms->solve_time, - ms->interp_geometry_count, (double)ms->interp_geometry_time / 1e6, - (double)ms->interp_geometry_time / ms->interp_geometry_count / 1e3); - fprintf(stderr, - "%g%% calc equation coefficients: %lu, " - "total time %g s, avg time per call %g ms\n", - (double)ms->calc_eq_coeffs_time * 100 / ms->solve_time, - ms->calc_eq_coeffs_count, (double)ms->calc_eq_coeffs_time / 1e6, - (double)ms->calc_eq_coeffs_time / ms->calc_eq_coeffs_count / 1e3); - fprintf(stderr, - "%g%% pseudospectral matrix construction: %lu, " - "total time %g s, avg time per call %g ms\n", - (double)ms->construct_matrix_time * 100 / ms->solve_time, - ms->construct_matrix_count, (double)ms->construct_matrix_time / 1e6, - (double)ms->construct_matrix_time / ms->construct_matrix_count / 1e3); - fprintf(stderr, - "%g%% BiCGSTAB %lu solves, " - "%lu iterations, total time %g s, " - "avg iterations per solve %g, avg time per solve %g ms, " - "avg time per iteration %g ms\n", - (double)ms->cg_time_total * 100 / ms->solve_time, - ms->cg_solve_count, ms->cg_iter_count, (double)ms->cg_time_total / 1e6, - (double)ms->cg_iter_count / ms->cg_solve_count, - (double)ms->cg_time_total / ms->cg_solve_count / 1e3, - (double)ms->cg_time_total / ms->cg_iter_count / 1e3); - fprintf(stderr, - "%g%% LU %lu solves, total time %g s, avg time per solve %g ms\n", - (double)ms->lu_solves_time * 100 / ms->solve_time, - ms->lu_solves_count, (double)ms->lu_solves_time / 1e6, - (double)ms->lu_solves_time / ms->lu_solves_count / 1e3); + if (!(ms->grid_expand_count & 255)) { + fprintf(stderr, "Quasi-maximal slicing stats:\n"); + + qms_solver_print_stats(ms->solver); + fprintf(stderr, - "%g%% grid expansion: %lu, total time %g s, avg time per call %g ms\n", - (double)ms->grid_expand_time * 100 / ms->solve_time, + "%lu evals: total time %g s, avg time per call %g ms\n", ms->grid_expand_count, (double)ms->grid_expand_time / 1e6, (double)ms->grid_expand_time / ms->grid_expand_count / 1e3); } } +static int context_init(cGH *cctkGH) +{ + QMSContext *qms; + int ret; + + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + qms = calloc(1, sizeof(*qms)); + if (!qms) + return -ENOMEM; + + qms->gh = cctkGH; + + ret = qms_solver_init(&qms->solver, cctkGH, basis_order_r, basis_order_z, + scale_factor, filter_power, 0.0); + if (ret < 0) + return ret; + + ret = posix_memalign((void**)&qms->coeffs_eval, 32, + basis_order_r * basis_order_z * sizeof(*qms->coeffs_eval)); + if (ret) + return -ENOMEM; + + for (int i = 0; i < ARRAY_ELEMS(qms->solution_cache); i++) { + ret = posix_memalign((void**)&qms->solution_cache[i].coeffs, 32, + basis_order_r * basis_order_z * sizeof(*qms->solution_cache[i].coeffs)); + if (ret) + return -ENOMEM; + } + + qms_context = qms; + + return 0; +} + void qms_init(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; + if (!qms_context) + context_init(cctkGH); + double *Kdot11 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot11"); double *Kdot22 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot22"); double *Kdot33 = CCTK_VarDataPtr(cctkGH, 0, "ML_BSSN::Kdot33"); |