From 83f009f5ee35f7ebce3059155859770accc53902 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 6 May 2019 18:21:20 +0200 Subject: tmp --- ell_grid_solve.c | 9 ++-- meson.build | 2 + mg2d.c | 146 ++++++++++++++++++++++++++++++++++++++++++------------- mg2d_test.c | 17 ++++--- 4 files changed, 131 insertions(+), 43 deletions(-) diff --git a/ell_grid_solve.c b/ell_grid_solve.c index f8512f6..978c26f 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -171,11 +171,12 @@ static void boundaries_apply_reflect(double *dst, const ptrdiff_t dst_stride[2], for (int j = 1; j <= FD_STENCIL_MAX; j++) memcpy(dst + j * dst_stride[1], dst - j * dst_stride[1], sizeof(*dst) * boundary_size); } else { +//#pragma omp parallel for for (size_t i = 0; i < boundary_size; i++) { for (int j = 1; j <= FD_STENCIL_MAX; j++) - dst[dst_stride[1] * j] = dst[-dst_stride[1] * j]; + dst[dst_stride[0] * i + dst_stride[1] * j] = dst[dst_stride[0] * i -dst_stride[1] * j]; - dst += dst_stride[0]; + //dst += dst_stride[0]; } } } @@ -304,7 +305,7 @@ static void line_madd_c(ptrdiff_t linesize, double *dst, const double *src, doub dst[i] += c * src[i]; } -static int residual_add_task(void *arg, unsigned int job_idx, unsigned int thread_idx) +static inline int residual_add_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { #if 1 EGSContext *ctx = arg; @@ -313,6 +314,8 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa priv->r.line_add(ctx->domain_size[0], ctx->u->data + ctx->u->stride[0] * job_idx, ctx->residual->data + ctx->residual->stride[0] * job_idx, priv->r.relax_factor); #else + EGSContext *ctx = arg; + EGSInternal *priv = ctx->priv; for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0; diff --git a/meson.build b/meson.build index 0c55f2e..e070883 100644 --- a/meson.build +++ b/meson.build @@ -26,7 +26,9 @@ libcblas = cc.find_library('blas') liblapacke = cc.find_library('lapacke') dep_tp = declare_dependency(link_args : '-lthreadpool') +dep_omp = dependency('openmp') +#deps = [dep_tp, libm, libcblas, liblapacke, dep_omp] deps = [dep_tp, libm, libcblas, liblapacke] cdata = configuration_data() diff --git a/mg2d.c b/mg2d.c index 7cdf7bc..ef7182d 100644 --- a/mg2d.c +++ b/mg2d.c @@ -51,6 +51,8 @@ typedef struct MG2DLevel { struct MG2DLevel *child; + double correct_fact; + /* timings */ int64_t count_cycles; Timer timer_solve; @@ -104,7 +106,7 @@ static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thr for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) { const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0; const ptrdiff_t idx_src = job_idx * level->prolong_tmp->stride[0] + idx0; - level->solver->u->data[idx_dst] -= level->prolong_tmp->data[idx_src]; + level->solver->u->data[idx_dst] -= level->correct_fact * level->prolong_tmp->data[idx_src]; } return 0; } @@ -129,6 +131,51 @@ static int mg_solve(MG2DContext *ctx, MG2DLevel *level, enum EGSType solve_type, return 0; } +static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact); + +static int mg_handle_child(MG2DContext *ctx, MG2DLevel *src, int allow_exact) +{ + MG2DLevel *dst = src->child; + int ret, ret_subgrid; + + /* restrict the residual as to the coarser-level rhs */ + mg2di_timer_start(&src->timer_restrict); + ret = mg2di_gt_transfer(src->transfer_restrict, dst->solver->rhs, + src->solver->residual); + mg2di_timer_stop(&src->timer_restrict); + if (ret < 0) + return ret; + + /* solve on the coarser level */ + ret_subgrid = mg_solve_subgrid(ctx, dst, allow_exact); + if (ret_subgrid < 0) + return ret_subgrid; + + /* prolongate the coarser-level correction */ + mg2di_timer_start(&src->timer_prolong); + ret = mg2di_gt_transfer(src->transfer_prolong, src->prolong_tmp, + dst->solver->u); + mg2di_timer_stop(&src->timer_prolong); + if (ret < 0) + return ret; + + /* apply the correction */ + mg2di_timer_start(&src->timer_correct); + tp_execute(ctx->priv->tp, src->solver->domain_size[1], coarse_correct_task, src); + mg2di_timer_stop(&src->timer_correct); + + /* re-init the current-level solver (re-calc the residual) */ + mg2di_timer_start(&src->timer_reinit); + ret = mg2di_egs_init(src->solver, src->egs_init_flags); + mg2di_timer_stop(&src->timer_reinit); + if (ret < 0) + return ret; + + src->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS; + + return ret_subgrid; +} + static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact) { enum EGSType solve_type = allow_exact && level->solver->domain_size[0] <= ctx->max_exact_size ? @@ -161,11 +208,13 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact) return ret; level->count_cycles++; + ret = 1; goto finish; } for (int i = 0; i < ctx->nb_cycles; i++) { double res_prev; + int nb_relax_post = ctx->nb_relax_post; /* pre-restrict relaxation */ for (int j = 0; j < ctx->nb_relax_pre; j++) { @@ -174,52 +223,78 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact) "pre-relax", j == ctx->nb_relax_pre - 1 && level->child); if (ret < 0) return ret; - if (res_prev / level->solver->residual_max > 1.1) - j--; + //if (res_prev / level->solver->residual_max > 1.1) + // j--; } if (level->child) { - /* restrict the residual as to the coarser-level rhs */ - mg2di_timer_start(&level->timer_restrict); - ret = mg2di_gt_transfer(level->transfer_restrict, level->child->solver->rhs, - level->solver->residual); - mg2di_timer_stop(&level->timer_restrict); - if (ret < 0) - return ret; + res_prev = level->solver->residual_max; - /* solve on the coarser level */ - ret = mg_solve_subgrid(ctx, level->child, 1); + ret = mg_handle_child(ctx, level, 1); if (ret < 0) return ret; - /* prolongate the coarser-level correction */ - mg2di_timer_start(&level->timer_prolong); - ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp, - level->child->solver->u); - mg2di_timer_stop(&level->timer_prolong); - if (ret < 0) - return ret; + if (ret == 1 && res_prev / level->solver->residual_max < 0.2 && level->solver->residual_max > ctx->tol) { + mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE, + "Discarding correct from exact solve %g -> %g (%g)\n", + res_prev, level->solver->residual_max, + res_prev / level->solver->residual_max); + + level->correct_fact = -1.0; + /* unapply the correction */ + mg2di_timer_start(&level->timer_correct); + tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level); + mg2di_timer_stop(&level->timer_correct); + + level->correct_fact = 1.0; + + /* re-init the current-level solver (re-calc the residual) */ + mg2di_timer_start(&level->timer_reinit); + ret = mg2di_egs_init(level->solver, level->egs_init_flags); + mg2di_timer_stop(&level->timer_reinit); + if (ret < 0) + return ret; + + ret = mg_handle_child(ctx, level, 0); + if (ret < 0) + return ret; + } + +#if 0 + if (0 && res_prev / level->solver->residual_max < 0.6) { + mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE, + "Rejecting coarse correct %g -> %g (%g)\n", + res_prev, level->solver->residual_max, + res_prev / level->solver->residual_max); - /* apply the correction */ + level->correct_fact = -1.0; + /* unapply the correction */ mg2di_timer_start(&level->timer_correct); tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level); mg2di_timer_stop(&level->timer_correct); + level->correct_fact = 1.0; + /* re-init the current-level solver (re-calc the residual) */ - res_prev = level->solver->residual_max; mg2di_timer_start(&level->timer_reinit); ret = mg2di_egs_init(level->solver, level->egs_init_flags); mg2di_timer_stop(&level->timer_reinit); if (ret < 0) return ret; - - level->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS; + } + if (0 && res_prev / level->solver->residual_max < 0.9) { + nb_relax_post *= 4 * level->solver->residual_max / res_prev; + mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE, + "Extra post-relax: %d -> %d\n", + ctx->nb_relax_post, nb_relax_post); + } +#endif log_egs_step(ctx, level, "correct", res_prev, level->solver->residual_max); } /* post-correct relaxation */ - for (int j = 0; j < ctx->nb_relax_post; j++) { + for (int j = 0; j < nb_relax_post; j++) { ret = mg_solve(ctx, level, solve_type, "post-relax", 0); if (ret < 0) return ret; @@ -233,12 +308,12 @@ finish: if (!isfinite(res_new) || (res_new > 1e2 * DBL_EPSILON && res_old / res_new <= 1e-2)) { mg2di_log(&ctx->priv->logger, MG2D_LOG_ERROR, - "The relaxation step at level %d has diverged: %g -> %g\n", + "The step at level %d has diverged: %g -> %g\n", level->depth, res_old, res_new); return MG2D_ERR_DIVERGE; } - return 0; + return ret; } static void bnd_zero(MG2DBoundary *bdst, size_t nb_rows, size_t domain_size) @@ -378,10 +453,11 @@ static int mg_levels_init(MG2DContext *ctx) } } - cur->solver->logger = priv->logger; - cur->solver->cpuflags = priv->cpuflags; - cur->solver->tp = priv->tp; + cur->correct_fact = 1.0; + cur->solver->logger = priv->logger; + cur->solver->cpuflags = priv->cpuflags; + cur->solver->tp = priv->tp; cur->solver->fd_stencil = ctx->fd_stencil; cur->solver->relax->relax_factor = ctx->cfl_factor; @@ -410,7 +486,7 @@ static int mg_levels_init(MG2DContext *ctx) if (ctx->fd_stencil == 1) op_restrict = GRID_TRANSFER_FW_3; else - op_restrict = GRID_TRANSFER_FW_5; + op_restrict = GRID_TRANSFER_FW_1; } else op_restrict = op_interp; @@ -456,7 +532,7 @@ static int mg_levels_init(MG2DContext *ctx) if (ret < 0) return ret; - ret = restrict_diff_coeffs(ctx, op_interp, cur->child->solver, cur->solver); + ret = restrict_diff_coeffs(ctx, GRID_TRANSFER_LAGRANGE_1, cur->child->solver, cur->solver); if (ret < 0) return ret; } @@ -809,8 +885,9 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix) p = buf; ret = snprintf(p, sizeof(buf) - (p - buf), - "%2.2f%% level %d: %ld cycles %g s total time %g ms avg per call", - level_total * 100.0 / priv->timer_solve.time_nsec, level->depth, level->count_cycles, + "%2.2f%% level %d (%zup): %ld cycles %g s total time %g ms avg per call", + level_total * 100.0 / priv->timer_solve.time_nsec, level->depth, + level->solver->domain_size[0], level->count_cycles, level_total / 1e9, level_total / 1e6 / level->count_cycles); if (ret > 0) p += ret; @@ -833,9 +910,10 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix) } ret = snprintf(p, sizeof(buf) - (p - buf), - "||%2.2f%% init %2.2f%% residual %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)", + "||%2.2f%% init %2.2f%% residual (%g us avg) %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)", level->solver->timer_init.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_res_calc.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, + level->solver->timer_res_calc.time_nsec / 1e3 / level->solver->timer_res_calc.nb_runs, level->solver->timer_bnd.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_bnd_fixval.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_bnd_reflect.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, diff --git a/mg2d_test.c b/mg2d_test.c index 7f6654d..512f747 100644 --- a/mg2d_test.c +++ b/mg2d_test.c @@ -10,8 +10,8 @@ #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x)) -#define MAXITER (1 << 18) -#define TOL 1e-2 +#define MAXITER (1 << 15) +#define TOL 1e-9 #define DOMAIN_SIZE 1.0 @@ -93,6 +93,8 @@ int main(int argc, char **argv) long int gridsize; int ret = 0; + const char *omp_threads = getenv("OMP_NUM_THREADS"); + if (argc < 2) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; @@ -119,9 +121,13 @@ int main(int argc, char **argv) ctx->nb_cycles = 1; ctx->nb_relax_post = 2; ctx->tol = TOL; - ctx->nb_threads = 1; - ctx->log_level = MG2D_LOG_INFO; - ctx->max_levels = 1; + ctx->log_level = MG2D_LOG_VERBOSE; + //ctx->max_levels = 4; + ctx->max_exact_size = 33; + if (omp_threads) + ctx->nb_threads = strtol(omp_threads, NULL, 0); + if (ctx->nb_threads <= 0) + ctx->nb_threads = 1; for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) { MG2DBoundary *bnd = ctx->boundaries[bnd_loc]; @@ -173,7 +179,6 @@ int main(int argc, char **argv) goto fail; } - ctx->log_level = MG2D_LOG_VERBOSE; mg2d_print_stats(ctx, NULL); { -- cgit v1.2.3