From 383d610b1aa6864f6b97b950237e89a204b58e48 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 23 Aug 2018 13:11:43 +0200 Subject: Factor out lapse evaluation. --- src/maximal_slicing_axi.c | 135 ++++++++++++++-------------------------------- 1 file changed, 41 insertions(+), 94 deletions(-) diff --git a/src/maximal_slicing_axi.c b/src/maximal_slicing_axi.c index 4f4c72e..5e7f07d 100644 --- a/src/maximal_slicing_axi.c +++ b/src/maximal_slicing_axi.c @@ -84,13 +84,16 @@ static void coord_patch_free(CoordPatch *cp) free(cp->one); } -static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, int level, - CCTK_REAL *x, CCTK_REAL *y, CCTK_REAL *z) +static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, int level) { cGH *gh = ms->gh; int nb_coeffs_x = ms->solver->nb_coeffs[0]; int nb_coeffs_z = ms->solver->nb_coeffs[1]; + const double *a_x = CCTK_VarDataPtr(gh, 0, "grid::x"); + const double *a_y = CCTK_VarDataPtr(gh, 0, "grid::y"); + const double *a_z = CCTK_VarDataPtr(gh, 0, "grid::z"); + CoordPatch *cp; int64_t grid_size; int i; @@ -113,7 +116,7 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, int level, cp->level = level; for (i = 0; i < gh->cctk_lsh[1]; i++) - if (fabs(y[CCTK_GFINDEX3D(gh, 0, i, 0)]) < 1e-8) { + if (fabs(a_y[CCTK_GFINDEX3D(gh, 0, i, 0)]) < 1e-8) { cp->y_idx = i; break; } @@ -149,12 +152,12 @@ static CoordPatch *get_coord_patch(MaximalSlicingContext *ms, int level, posix_memalign((void**)&cp->transform_matrix1, 32, sizeof(*cp->transform_matrix1) * nb_coeffs_z * gh->cctk_lsh[0] * gh->cctk_lsh[2]); #pragma omp parallel for for (int j = 0; j < gh->cctk_lsh[2]; j++) { - CCTK_REAL zz = z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)]; + CCTK_REAL zz = a_z[CCTK_GFINDEX3D(ms->gh, 0, 0, j)]; for (int i = 0; i < gh->cctk_lsh[0]; i++) { const int idx_grid = j * gh->cctk_lsh[0] + i; - double xx = x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; + double xx = a_x[CCTK_GFINDEX3D(ms->gh, i, 0, 0)]; double rr = sqrt(SQR(xx) + SQR(zz)); #if MS_POLAR @@ -234,26 +237,11 @@ static void context_free(MaximalSlicingContext **pms) *pms = NULL; } -void maximal_slicing_axi_initlapse(CCTK_ARGUMENTS) +void msa_lapse_eval(MaximalSlicingContext *ms, double *dst) { - MaximalSlicingContext *ms; - - CoordPatch *cp; - - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - const int reflevel = ctz(cctkGH->cctk_levfac[0]); - double *coeffs = NULL; - int i, ret; - - context_init(cctkGH, &ms); - - cp = get_coord_patch(ms, reflevel, x, y, z); - - ms_solver_solve(ms->solver); - - coeffs = ms->solver->coeffs; + const int reflevel = ctz(ms->gh->cctk_levfac[0]); + CoordPatch *cp = get_coord_patch(ms, reflevel); + double *coeffs = ms->solver->coeffs; #if 0 #pragma omp parallel for @@ -281,19 +269,18 @@ void maximal_slicing_axi_initlapse(CCTK_ARGUMENTS) } #else cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], - 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2], - coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]); + ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], + 1.0, cp->transform_matrix, ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2], + coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, + ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2]); #pragma omp parallel for - for (int j = 0; j < cctk_lsh[2]; j++) - for (int i = 0; i < cctk_lsh[0]; i++) { - const int idx_grid = j * cctk_lsh[0] + i; + for (int j = 0; j < ms->gh->cctk_lsh[2]; j++) + for (int i = 0; i < ms->gh->cctk_lsh[0]; i++) { + const int idx_grid = j * ms->gh->cctk_lsh[0] + i; const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1, - cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]); - alpha[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = 1.0 + val; + cp->transform_tmp + idx_grid, ms->gh->cctk_lsh[0] * ms->gh->cctk_lsh[2]); + dst[CCTK_GFINDEX3D(ms->gh, i, cp->y_idx, j)] = 1.0 + val; } - - memcpy(alp, alpha, sizeof(*alp) * cctk_lsh[0] * cctk_lsh[2]); #endif //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); @@ -305,6 +292,24 @@ void maximal_slicing_axi_initlapse(CCTK_ARGUMENTS) // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0, // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x, // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]); +} + +void maximal_slicing_axi_initlapse(CCTK_ARGUMENTS) +{ + MaximalSlicingContext *ms; + + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + int i, ret; + + context_init(cctkGH, &ms); + + ms_solver_solve(ms->solver); + + msa_lapse_eval(ms, alpha); + + memcpy(alp, alpha, sizeof(*alp) * cctk_lsh[0] * cctk_lsh[2]); context_free(&ms); } @@ -313,15 +318,11 @@ void maximal_slicing_axi(CCTK_ARGUMENTS) { static MaximalSlicingContext *ms; - CoordPatch *cp; - DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; - const int reflevel = ctz(cctkGH->cctk_levfac[0]); int64_t expand_start, totaltime_start; - double *coeffs = NULL; int i, ret; totaltime_start = gettime(); @@ -330,77 +331,23 @@ void maximal_slicing_axi(CCTK_ARGUMENTS) if (!ms) context_init(cctkGH, &ms); - cp = get_coord_patch(ms, reflevel, x, y, z); - CCTK_TimerStart("MaximalSlicingAxi_Solve"); ms_solver_solve(ms->solver); CCTK_TimerStop("MaximalSlicingAxi_Solve"); - coeffs = ms->solver->coeffs; - if (export_coeffs) - memcpy(alpha_coeffs, coeffs, sizeof(*alpha_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]); + memcpy(alpha_coeffs, ms->solver->coeffs, sizeof(*alpha_coeffs) * ms->solver->nb_coeffs[0] * ms->solver->nb_coeffs[1]); CCTK_TimerStart("MaximalSlicingAxi_Expand"); expand_start = gettime(); -#if 0 -#pragma omp parallel for - for (int k = 0; k < cctk_lsh[2]; k++) { - for (int i = 0; i < cctk_lsh[0]; i++) { - int idx = CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, k); - double xx = x[idx]; - double zz = z[idx]; - double r = sqrt(SQR(xx) + SQR(zz)); - double phi = atan2(zz, xx); - double val = 1.0; - - for (int l = 0; l < ms->nb_coeffs_z; l++) { - double tmp = 0.0; - for (int m = 0; m < ms->nb_coeffs_x; m++) { - const int idx_coeff = l * ms->nb_coeffs_x + m; - tmp += ms->coeffs[idx_coeff] * ms->basis->eval(r, m); - } - val += tmp * ms->basis1->eval(phi, l); - } - - alp[idx] = val; - } - } -#else - cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - cctk_lsh[0] * cctk_lsh[2], ms->solver->nb_coeffs[1], ms->solver->nb_coeffs[0], - 1.0, cp->transform_matrix, cctk_lsh[0] * cctk_lsh[2], - coeffs, ms->solver->nb_coeffs[0], 0.0, cp->transform_tmp, cctk_lsh[0] * cctk_lsh[2]); -#pragma omp parallel for - for (int j = 0; j < cctk_lsh[2]; j++) - for (int i = 0; i < cctk_lsh[0]; i++) { - const int idx_grid = j * cctk_lsh[0] + i; - const double val = cblas_ddot(ms->solver->nb_coeffs[1], cp->transform_matrix1 + idx_grid * ms->solver->nb_coeffs[1], 1, - cp->transform_tmp + idx_grid, cctk_lsh[0] * cctk_lsh[2]); - alpha[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = 1.0 + val; - } -#endif - //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); - //memset(alp, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); - //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - // ms->nb_coeffs_x, cctk_lsh[2], ms->nb_coeffs_z, 1.0, - // coeffs, ms->nb_coeffs_x, cp->basis_val_z, ms->nb_coeffs_z, - // 0.0, cp->transform_z, ms->nb_coeffs_x); - //cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, - // cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->nb_coeffs_x, 1.0, - // cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->nb_coeffs_x, - // 1.0, alp, cctk_lsh[0] * cctk_lsh[1]); + msa_lapse_eval(ms, alpha); ms->grid_expand_time += gettime() - expand_start; ms->grid_expand_count++; CCTK_TimerStop("MaximalSlicingAxi_Expand"); - //CCTK_TimerStart("MaximalSlicingAxi_Polish"); - //msa_sor(ms, cp); - //CCTK_TimerStop("MaximalSlicingAxi_Polish"); - /* print stats */ if (!(ms->grid_expand_count & 255)) { fprintf(stderr, "Maximal slicing stats:\n"); -- cgit v1.2.3