#include #include #include "cctk.h" #define POLAR (1) #define CCZ4 (0) #define SQR(x) ((x) * (x)) #define SGN(x) ((x) >= 0.0 ? 1.0 : -1.0) #define MAX(x, y) ((x) > (y) ? (x) : (y)) #define MIN(x, y) ((x) > (y) ? (y) : (x)) #define ARRAY_ELEMS(arr) (sizeof(arr) / sizeof(*arr)) /* * small number to avoid r=0 singularities */ #define EPS 1E-08 #define SCALE_FACTOR scale_factor /* indices (in our code, not cactus structs) of the grid functions which we'll need to * interpolate on the pseudospectral grid */ enum MetricVars { GTXX = 0, GTYY, GTZZ, GTXY, GTXZ, GTYZ, PHI, ATXX, ATYY, ATZZ, ATXY, ATXZ, ATYZ, K, XTX, XTY, XTZ, BETAX, BETAY, BETAZ, ALPHA, KDOT_XX, KDOT_YY, KDOT_ZZ, KDOT_XY, KDOT_XZ, KDOT_YZ, XTDOT_X, XTDOT_Y, XTDOT_Z, PHIDOT, NB_METRIC_VARS, }; /* indices of the interpolated values of the above grid functions and their derivatives */ enum InterpMetricVars { I_GTXX = 0, I_GTYY, I_GTZZ, I_GTXY, I_GTXZ, I_GTYZ, I_PHI, I_PHI_DX, I_PHI_DY, I_PHI_DZ, I_ATXX, I_ATYY, I_ATZZ, I_ATXY, I_ATXZ, I_ATYZ, I_K, I_K_DX, I_K_DY, I_K_DZ, I_XTX, I_XTY, I_XTZ, I_BETAX, I_BETAY, I_BETAZ, I_ALPHA, I_ALPHA_DX, I_ALPHA_DY, I_ALPHA_DZ, I_ALPHA_DXX, I_ALPHA_DYY, I_ALPHA_DZZ, I_ALPHA_DXY, I_ALPHA_DXZ, I_ALPHA_DYZ, I_KDOT_XX, I_KDOT_YY, I_KDOT_ZZ, I_KDOT_XY, I_KDOT_XZ, I_KDOT_YZ, I_XTDOT_X, I_XTDOT_Y, I_XTDOT_Z, I_PHIDOT, I_PHIDOT_DX, I_PHIDOT_DY, I_PHIDOT_DZ, NB_INTERP_VARS, }; /* a set of basis functions */ typedef struct BasisSet { /* evaluate the idx-th basis function at the specified point*/ double (*eval) (double coord, int idx); /* evaluate the first derivative of the idx-th basis function at the specified point*/ double (*eval_diff1)(double coord, int idx); /* evaluate the second derivative of the idx-th basis function at the specified point*/ double (*eval_diff2)(double coord, int idx); /** * Get the idx-th collocation point for the specified order. * idx runs from 0 to order - 1 (inclusive) */ double (*colloc_point)(int order, int idx); } BasisSet; extern const BasisSet qms_cheb_basis; extern const BasisSet qms_cheb_even_basis; extern const BasisSet qms_full_basis; extern const BasisSet qms_tb_even_basis; extern const BasisSet qms_sb_even_basis; extern const BasisSet qms_tl_basis; extern const BasisSet qms_cos_even_basis; extern double scale_factor; typedef struct MGContext MGContext; typedef struct SORContext SORContext; /* precomputed values for a given refined grid */ typedef struct CoordPatch { CCTK_REAL origin[3]; CCTK_INT delta[3]; CCTK_INT size[3]; // basis values on the grid double *basis_val_r; double *basis_val_z; double *transform_z; double *transform_matrix; double *transform_matrix1; double *transform_tmp; double *one; int y_idx; MGContext *mg; SORContext *sor; } CoordPatch; /* state and scratch storage for the BiCGSTAB solver */ typedef struct BiCGSTABContext { double *p, *v, *y, *z, *t; double *res, *res0; double *k; cl_mem cl_p, cl_v, cl_y, cl_z, cl_t; cl_mem cl_res, cl_res0; cl_mem cl_k, cl_mat; cl_mem cl_rho, cl_alpha, cl_beta, cl_omega, cl_omega1; cl_mem cl_tmp, cl_tmp1; } BiCGSTABContext; typedef struct MaximalSlicingContext { cGH *gh; const BasisSet *basis; const BasisSet *basis1; struct { double time; double *coeffs; } solution_cache[8]; int nb_solutions; double *coeffs_eval; BiCGSTABContext bicgstab; int steps_since_inverse; uint64_t solve_count; uint64_t solve_time; uint64_t lu_solves_count; uint64_t lu_solves_time; uint64_t cg_solve_count; uint64_t cg_iter_count; uint64_t cg_time_total; uint64_t interp_geometry_count; uint64_t interp_geometry_time; uint64_t calc_eq_coeffs_count; uint64_t calc_eq_coeffs_time; uint64_t construct_matrix_count; uint64_t construct_matrix_time; uint64_t grid_expand_count; uint64_t grid_expand_time; // the grid of collocation points double *colloc_grid[2]; // interpolation parameters int coord_system; int interp_operator; int interp_params; CCTK_REAL *interp_coords[3]; int interp_vars_indices[NB_METRIC_VARS]; CCTK_REAL *interp_values[NB_INTERP_VARS]; CCTK_REAL *interp_values_prefilter[NB_INTERP_VARS]; CCTK_INT interp_value_codes[NB_INTERP_VARS]; CCTK_REAL *metric_u[6]; CCTK_REAL *kij_kij; CCTK_REAL *trk; int nb_coeffs_x; int nb_coeffs_z; int nb_coeffs; int nb_colloc_points_x; int nb_colloc_points_z; int nb_colloc_points; int colloc_grid_order_x; int colloc_grid_order_z; double *mat; double *mat_f; double *rhs; double *coeffs; int *ipiv; double *basis_x_val; double *basis_x_dval; double *basis_x_d2val; double *basis_z_val; double *basis_z_dval; double *basis_z_d2val; double *basis_val_00; double *basis_val_20; double *basis_val_02; double *basis_val_11; double *basis_val_10; double *basis_val_01; double *eq_coeff_00; double *eq_coeff_20; double *eq_coeff_02; double *eq_coeff_11; double *eq_coeff_10; double *eq_coeff_01; double *coeff_scale; double *input_filter; CoordPatch *patches; int nb_patches; // OpenCL / CLBLAS stuff cl_context cl_ctx; cl_command_queue cl_queue; cl_device_id ocl_device; cl_mem ocl_coeffs; } MaximalSlicingContext; int qms_maximal_solve(MaximalSlicingContext *ms); #include static inline int64_t gettime(void) { struct timeval tv; gettimeofday(&tv, NULL); return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; }