From b9d2d61d033200f1168b43d3a84772a1739d95f3 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 29 Nov 2018 17:58:17 +0100 Subject: Add timing benchmarks. --- src/maximal_slicing_axi_mg.c | 182 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 169 insertions(+), 13 deletions(-) diff --git a/src/maximal_slicing_axi_mg.c b/src/maximal_slicing_axi_mg.c index 0838d6a..b5255cc 100644 --- a/src/maximal_slicing_axi_mg.c +++ b/src/maximal_slicing_axi_mg.c @@ -23,6 +23,20 @@ #define CPINDEX(cp, i, j, k) ((k * cp->grid_size[1] + j) * cp->grid_size[0] + i) +#if 0 +#define LOGDEBUG(...) fprintf(stderr, __VA_ARGS) +#else +#define LOGDEBUG +#endif + +#include +static inline int64_t gettime(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; +} + typedef struct MaximalSlicingContext MaximalSlicingContext; /* precomputed values for a given refined grid */ @@ -57,6 +71,37 @@ typedef struct MSMGContext { int nb_patches; MaximalSlicingContext *ps_solver; + + /* timings */ + int64_t time_solve; + int64_t count_solve; + + int64_t time_solve_ps; + int64_t count_solve_ps; + + int64_t time_solve_mg; + int64_t count_solve_mg; + int64_t time_solve_fill_coeffs; + int64_t time_solve_boundaries; + int64_t time_solve_mg2d; + int64_t time_solve_export; + int64_t time_solve_history; + + int64_t time_eval; + int64_t count_eval; + + int64_t time_eval_ps; + int64_t count_eval_ps; + + int64_t time_extrapolate; + int64_t count_extrapolate; + + int64_t time_fine_solve; + int64_t count_fine_solve; + int64_t time_fine_fill_coeffs; + int64_t time_fine_boundaries; + int64_t time_fine_mg2d; + int64_t time_fine_export; } MSMGContext; int msa_context_init(cGH *gh, MaximalSlicingContext **ctx, int basis_order0, int basis_order1, double scale_factor, double filter_power); @@ -183,6 +228,54 @@ static CoordPatch *get_coord_patch(MSMGContext *ms, int level) return cp; } +static void print_stats(MSMGContext *ms) +{ + int64_t total = ms->time_solve + ms->time_eval; + + fprintf(stderr, "%2.2f%% eval: %ld runs; %g s total; %g ms avg per run\n", + (double)ms->time_eval * 100.0 / total, ms->count_eval, + ms->time_eval / 1e6, ms->time_eval / 1e3 / ms->count_eval); + fprintf(stderr, " %2.2f%% eval ps: %ld runs; %g s total; %g ms avg per run\n", + (double)ms->time_eval_ps * 100.0 / total, ms->count_eval_ps, + ms->time_eval_ps / 1e6, ms->time_eval_ps / 1e3 / ms->count_eval_ps); + fprintf(stderr, " %2.2f%% eval extrapolate: %ld runs; %g s total; %g ms avg per run\n", + (double)ms->time_extrapolate * 100.0 / total, ms->count_extrapolate, + ms->time_extrapolate / 1e6, ms->time_extrapolate / 1e3 / ms->count_extrapolate); + fprintf(stderr, " %2.2f%% fine solve: %ld runs; %g s total; %g ms avg per run || " + "%2.2f%% fill coeffs %2.2f%% boundaries %2.2f%% mg2d %2.2f%% export\n", + (double)ms->time_fine_solve * 100.0 / total, ms->count_fine_solve, + ms->time_fine_solve / 1e6, ms->time_fine_solve / 1e3 / ms->count_fine_solve, + (double)ms->time_fine_fill_coeffs * 100.0 / ms->time_fine_solve, + (double)ms->time_fine_boundaries * 100.0 / ms->time_fine_solve, + (double)ms->time_fine_mg2d * 100.0 / ms->time_fine_solve, + (double)ms->time_fine_export * 100.0 / ms->time_fine_solve); + + fprintf(stderr, "%2.2f%% solve: %ld runs; %g s total; %g ms avg per run\n", + (double)ms->time_solve * 100.0 / total, ms->count_solve, + ms->time_solve / 1e6, ms->time_solve / 1e3 / ms->count_solve); + fprintf(stderr, " %2.2f%% solve ps: %ld runs; %g s total; %g ms avg per run\n", + (double)ms->time_solve_ps * 100.0 / total, ms->count_solve_ps, + ms->time_solve_ps / 1e6, ms->time_solve_ps / 1e3 / ms->count_solve_ps); + fprintf(stderr, " %2.2f%% solve mg2d: %ld runs; %g s total; %g ms avg per run || " + "%2.2f%% fill coeffs %2.2f%% boundaries %2.2f%% mg2d %2.2f%% export %2.2f%% history\n", + (double)ms->time_solve_mg * 100.0 / total, ms->count_solve_mg, + ms->time_solve_mg / 1e6, ms->time_solve_mg / 1e3 / ms->count_solve_mg, + (double)ms->time_solve_fill_coeffs * 100.0 / ms->time_solve_mg, + (double)ms->time_solve_boundaries * 100.0 / ms->time_solve_mg, + (double)ms->time_solve_mg2d * 100.0 / ms->time_solve_mg, + (double)ms->time_solve_export * 100.0 / ms->time_solve_mg, + (double)ms->time_solve_history * 100.0 / ms->time_solve_mg); + + for (int i = 0; i < ms->nb_patches; i++) { + CoordPatch *cp = get_coord_patch(ms, i); + char indent_buf[64]; + + snprintf(indent_buf, sizeof(indent_buf), " [%d] ", i); + + mg2d_print_stats(cp->solver, indent_buf); + } +} + static int context_init(cGH *gh, MSMGContext **ctx) { MSMGContext *ms; @@ -370,6 +463,8 @@ void msa_mg_eval(CCTK_ARGUMENTS) DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; + int64_t total_start; + int nb_levels; int nb_levels_type, ret; @@ -378,15 +473,23 @@ void msa_mg_eval(CCTK_ARGUMENTS) const int reflevel = ctz(cctkGH->cctk_levfac[0]); double time_interp_step, fact0, fact1; - fprintf(stderr, "evaluating lapse at rl=%d, t=%g\n", reflevel, t); + total_start = gettime(); + + LOGDEBUG( "evaluating lapse at rl=%d, t=%g\n", reflevel, t); nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type); if (!history_initialized(lapse_prev0_time, lapse_prev1_time, nb_levels)) { + int64_t eval_ps_start = gettime(); + solve_ps(ms, lapse_mg_eval); memcpy(alpha, lapse_mg_eval, sizeof(*alpha) * grid_size); - fprintf(stderr, "pseudospectral solve\n"); - return; + + ms->time_eval_ps += gettime() - eval_ps_start; + ms->count_eval_ps++; + + LOGDEBUG( "pseudospectral solve\n"); + goto finish; } if (lapse_prev1_time[reflevel] <= lapse_prev0_time[reflevel] || @@ -399,19 +502,29 @@ void msa_mg_eval(CCTK_ARGUMENTS) if (reflevel < nb_levels - 1) { /* on coarse levels use extrapolated past solutions */ - fprintf(stderr, "extrapolating from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); + int64_t extrap_start = gettime(); + + LOGDEBUG( "extrapolating from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); for (size_t i = 0; i < grid_size; i++) lapse_mg_eval[i] = fact0 * lapse_prev0[i] + fact1 * lapse_prev1[i]; + + ms->time_extrapolate += gettime() - extrap_start; + ms->count_extrapolate++; } else { /* on the finest level, use the extrapolation only for the boundary * values and solve for the interior */ + int64_t fine_solve_start = gettime(); + int64_t start; CoordPatch *cp = get_coord_patch(ms, reflevel); + start = gettime(); fill_eq_coeffs(ms->gh, cp); + ms->time_fine_fill_coeffs += gettime() - start; - fprintf(stderr, "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); + LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); + start = gettime(); for (ptrdiff_t idx_z = 0; idx_z < cp->offset_right[1]; idx_z++) for (ptrdiff_t idx_x = 0; idx_x < cp->boundary_val_stride[0]; idx_x++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, idx_x + cp->offset_left[0], cp->y_idx, cctkGH->cctk_lsh[2] - cp->offset_right[1] + idx_z); @@ -432,16 +545,28 @@ void msa_mg_eval(CCTK_ARGUMENTS) const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cctkGH->cctk_lsh[0] - cp->offset_right[0], cp->y_idx, cp->offset_left[1] + i); cp->solver->boundaries[MG2D_BOUNDARY_1U]->val[i] = lapse_mg_eval[idx] - 1.0; } + ms->time_fine_boundaries += gettime() - start; - fprintf(stderr, "mg solve\n"); + LOGDEBUG( "mg solve\n"); + start = gettime(); ret = mg2d_solve(cp->solver); if (ret < 0) CCTK_WARN(0, "Error solving the maximal slicing equation"); + ms->time_fine_mg2d += gettime() - start; + start = gettime(); solution_to_grid(cp, lapse_mg_eval); + ms->time_fine_export = gettime() - start; + + ms->time_fine_solve += gettime() - fine_solve_start; + ms->count_fine_solve++; } memcpy(alpha, lapse_mg_eval, sizeof(*alpha) * grid_size); + +finish: + ms->time_eval += gettime() - total_start; + ms->count_eval++; } void msa_mg_solve(CCTK_ARGUMENTS) @@ -451,6 +576,8 @@ void msa_mg_solve(CCTK_ARGUMENTS) CoordPatch *cp; + int64_t total_start, mg_start, start; + int nb_levels, reflevel_top, ts_tmp; int nb_levels_type, ret; @@ -461,7 +588,9 @@ void msa_mg_solve(CCTK_ARGUMENTS) double *lapse_mg1; - fprintf(stderr, "solve lapse at rl=%d, t=%g; step %d\n", reflevel, t, timestep); + total_start = gettime(); + + LOGDEBUG( "solve lapse at rl=%d, t=%g; step %d\n", reflevel, t, timestep); nb_levels = *(int*)CCTK_ParameterGet("num_levels_1", "CarpetRegrid2", &nb_levels_type); @@ -474,27 +603,36 @@ void msa_mg_solve(CCTK_ARGUMENTS) if (!history_initialized(lapse_prev0_time, lapse_prev1_time, nb_levels)) { if (reflevel == 0 || !(timestep % 2)) { + int64_t solve_ps_start = gettime(); + if (lapse_prev1_time[reflevel] != DBL_MAX && reflevel_top == 0) { - fprintf(stderr, "copy %g\n", lapse_prev1_time[reflevel]); + LOGDEBUG( "copy %g\n", lapse_prev1_time[reflevel]); memcpy(lapse_prev0, lapse_prev1, grid_size * sizeof(*lapse_prev1)); lapse_prev0_time[reflevel] = lapse_prev1_time[reflevel]; } - fprintf(stderr, "solve ps\n"); + LOGDEBUG( "solve ps\n"); solve_ps(ms, alpha); memcpy(lapse_prev1, alpha, grid_size * sizeof(*lapse_prev0)); lapse_prev1_time[reflevel] = t; + + ms->time_solve_ps += gettime() - solve_ps_start; + ms->count_solve_ps++; } - return; + goto finish; } - fprintf(stderr, "mg solve cur %d top %d\n", reflevel, reflevel_top); + mg_start = gettime(); + LOGDEBUG( "mg solve cur %d top %d\n", reflevel, reflevel_top); /* fill in the equation coefficients */ cp = get_coord_patch(ms, reflevel); + start = gettime(); fill_eq_coeffs(cctkGH, cp); + ms->time_solve_fill_coeffs += gettime() - start; + start = gettime(); if (reflevel == 0 || timestep % 2) { /* outer-most level for this solve, use extrapolated lapse as the * boundary condition */ @@ -502,7 +640,7 @@ void msa_mg_solve(CCTK_ARGUMENTS) double fact0 = (lapse_prev1_time[reflevel] - t) / time_interp_step; double fact1 = (t - lapse_prev0_time[reflevel]) / time_interp_step; - fprintf(stderr, "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); + LOGDEBUG( "extrapolating BCs from t1=%g (%g) and t0=%g (%g)\n", lapse_prev1_time[reflevel], fact1, lapse_prev0_time[reflevel], fact0); for (size_t i = 0; i < cp->solver->boundaries[MG2D_BOUNDARY_0U]->val_len; i++) { const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, i + cp->offset_left[0], cp->y_idx, cctkGH->cctk_lsh[2] - cp->offset_right[1]); @@ -525,22 +663,28 @@ void msa_mg_solve(CCTK_ARGUMENTS) const ptrdiff_t idx = CCTK_GFINDEX3D(cctkGH, cctkGH->cctk_lsh[0] - cp->offset_right[0], cp->y_idx, cp->offset_left[1] + i); cp->solver->boundaries[MG2D_BOUNDARY_1U]->val[i] = lapse_mg[idx] - 1.0; } + ms->time_solve_boundaries += gettime() - start; /* do the elliptic solve */ + start = gettime(); ret = mg2d_solve(cp->solver); if (ret < 0) CCTK_WARN(0, "Error solving the maximal slicing equation"); + ms->time_solve_mg2d += gettime() - start; /* export the result */ + start = gettime(); solution_to_grid(cp, lapse_mg); lapse_mg1 = CCTK_VarDataPtr(cctkGH, 1, "MaximalSlicingAxiMG::lapse_mg"); memcpy(lapse_mg1, lapse_mg, grid_size * sizeof(*lapse_mg1)); + ms->time_solve_export += gettime() - start; /* update lapse history for extrapolation */ if (reflevel == 0 || !(timestep % 2)) { const double vel_fact = 1.0 / (1 << (reflevel - reflevel_top)); + start = gettime(); for (size_t j = 0; j < grid_size; j++) { const double sol_new = lapse_mg[j]; const double delta = sol_new - lapse_mg_eval[j]; @@ -550,7 +694,19 @@ void msa_mg_solve(CCTK_ARGUMENTS) } lapse_prev0_time[reflevel] = lapse_prev1_time[reflevel]; lapse_prev1_time[reflevel] = cctkGH->cctk_time; + + ms->time_solve_history += gettime() - start; } + + ms->time_solve_mg += gettime() - mg_start; + ms->count_solve_mg++; + +finish: + ms->time_solve += gettime() - total_start; + ms->count_solve++; + + if (reflevel == 0 && ms->count_solve > 0 && ms->count_eval > 0) + print_stats(ms); } void msa_mg_sync(CCTK_ARGUMENTS) @@ -558,7 +714,7 @@ void msa_mg_sync(CCTK_ARGUMENTS) DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; const int reflevel = ctz(cctkGH->cctk_levfac[0]); - fprintf(stderr, "\nsync %g %d\n\n", cctkGH->cctk_time, reflevel); + LOGDEBUG( "\nsync %g %d\n\n", cctkGH->cctk_time, reflevel); return; } -- cgit v1.2.3