summaryrefslogtreecommitdiff
path: root/mg2d.c
diff options
context:
space:
mode:
Diffstat (limited to 'mg2d.c')
-rw-r--r--mg2d.c146
1 files changed, 112 insertions, 34 deletions
diff --git a/mg2d.c b/mg2d.c
index 7cdf7bc..ef7182d 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -51,6 +51,8 @@ typedef struct MG2DLevel {
struct MG2DLevel *child;
+ double correct_fact;
+
/* timings */
int64_t count_cycles;
Timer timer_solve;
@@ -104,7 +106,7 @@ static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thr
for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) {
const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0;
const ptrdiff_t idx_src = job_idx * level->prolong_tmp->stride[0] + idx0;
- level->solver->u->data[idx_dst] -= level->prolong_tmp->data[idx_src];
+ level->solver->u->data[idx_dst] -= level->correct_fact * level->prolong_tmp->data[idx_src];
}
return 0;
}
@@ -129,6 +131,51 @@ static int mg_solve(MG2DContext *ctx, MG2DLevel *level, enum EGSType solve_type,
return 0;
}
+static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact);
+
+static int mg_handle_child(MG2DContext *ctx, MG2DLevel *src, int allow_exact)
+{
+ MG2DLevel *dst = src->child;
+ int ret, ret_subgrid;
+
+ /* restrict the residual as to the coarser-level rhs */
+ mg2di_timer_start(&src->timer_restrict);
+ ret = mg2di_gt_transfer(src->transfer_restrict, dst->solver->rhs,
+ src->solver->residual);
+ mg2di_timer_stop(&src->timer_restrict);
+ if (ret < 0)
+ return ret;
+
+ /* solve on the coarser level */
+ ret_subgrid = mg_solve_subgrid(ctx, dst, allow_exact);
+ if (ret_subgrid < 0)
+ return ret_subgrid;
+
+ /* prolongate the coarser-level correction */
+ mg2di_timer_start(&src->timer_prolong);
+ ret = mg2di_gt_transfer(src->transfer_prolong, src->prolong_tmp,
+ dst->solver->u);
+ mg2di_timer_stop(&src->timer_prolong);
+ if (ret < 0)
+ return ret;
+
+ /* apply the correction */
+ mg2di_timer_start(&src->timer_correct);
+ tp_execute(ctx->priv->tp, src->solver->domain_size[1], coarse_correct_task, src);
+ mg2di_timer_stop(&src->timer_correct);
+
+ /* re-init the current-level solver (re-calc the residual) */
+ mg2di_timer_start(&src->timer_reinit);
+ ret = mg2di_egs_init(src->solver, src->egs_init_flags);
+ mg2di_timer_stop(&src->timer_reinit);
+ if (ret < 0)
+ return ret;
+
+ src->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS;
+
+ return ret_subgrid;
+}
+
static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
{
enum EGSType solve_type = allow_exact && level->solver->domain_size[0] <= ctx->max_exact_size ?
@@ -161,11 +208,13 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
return ret;
level->count_cycles++;
+ ret = 1;
goto finish;
}
for (int i = 0; i < ctx->nb_cycles; i++) {
double res_prev;
+ int nb_relax_post = ctx->nb_relax_post;
/* pre-restrict relaxation */
for (int j = 0; j < ctx->nb_relax_pre; j++) {
@@ -174,52 +223,78 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
"pre-relax", j == ctx->nb_relax_pre - 1 && level->child);
if (ret < 0)
return ret;
- if (res_prev / level->solver->residual_max > 1.1)
- j--;
+ //if (res_prev / level->solver->residual_max > 1.1)
+ // j--;
}
if (level->child) {
- /* restrict the residual as to the coarser-level rhs */
- mg2di_timer_start(&level->timer_restrict);
- ret = mg2di_gt_transfer(level->transfer_restrict, level->child->solver->rhs,
- level->solver->residual);
- mg2di_timer_stop(&level->timer_restrict);
- if (ret < 0)
- return ret;
+ res_prev = level->solver->residual_max;
- /* solve on the coarser level */
- ret = mg_solve_subgrid(ctx, level->child, 1);
+ ret = mg_handle_child(ctx, level, 1);
if (ret < 0)
return ret;
- /* prolongate the coarser-level correction */
- mg2di_timer_start(&level->timer_prolong);
- ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp,
- level->child->solver->u);
- mg2di_timer_stop(&level->timer_prolong);
- if (ret < 0)
- return ret;
+ if (ret == 1 && res_prev / level->solver->residual_max < 0.2 && level->solver->residual_max > ctx->tol) {
+ mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE,
+ "Discarding correct from exact solve %g -> %g (%g)\n",
+ res_prev, level->solver->residual_max,
+ res_prev / level->solver->residual_max);
+
+ level->correct_fact = -1.0;
+ /* unapply the correction */
+ mg2di_timer_start(&level->timer_correct);
+ tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level);
+ mg2di_timer_stop(&level->timer_correct);
+
+ level->correct_fact = 1.0;
+
+ /* re-init the current-level solver (re-calc the residual) */
+ mg2di_timer_start(&level->timer_reinit);
+ ret = mg2di_egs_init(level->solver, level->egs_init_flags);
+ mg2di_timer_stop(&level->timer_reinit);
+ if (ret < 0)
+ return ret;
+
+ ret = mg_handle_child(ctx, level, 0);
+ if (ret < 0)
+ return ret;
+ }
+
+#if 0
+ if (0 && res_prev / level->solver->residual_max < 0.6) {
+ mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE,
+ "Rejecting coarse correct %g -> %g (%g)\n",
+ res_prev, level->solver->residual_max,
+ res_prev / level->solver->residual_max);
- /* apply the correction */
+ level->correct_fact = -1.0;
+ /* unapply the correction */
mg2di_timer_start(&level->timer_correct);
tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level);
mg2di_timer_stop(&level->timer_correct);
+ level->correct_fact = 1.0;
+
/* re-init the current-level solver (re-calc the residual) */
- res_prev = level->solver->residual_max;
mg2di_timer_start(&level->timer_reinit);
ret = mg2di_egs_init(level->solver, level->egs_init_flags);
mg2di_timer_stop(&level->timer_reinit);
if (ret < 0)
return ret;
-
- level->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS;
+ }
+ if (0 && res_prev / level->solver->residual_max < 0.9) {
+ nb_relax_post *= 4 * level->solver->residual_max / res_prev;
+ mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE,
+ "Extra post-relax: %d -> %d\n",
+ ctx->nb_relax_post, nb_relax_post);
+ }
+#endif
log_egs_step(ctx, level, "correct", res_prev, level->solver->residual_max);
}
/* post-correct relaxation */
- for (int j = 0; j < ctx->nb_relax_post; j++) {
+ for (int j = 0; j < nb_relax_post; j++) {
ret = mg_solve(ctx, level, solve_type, "post-relax", 0);
if (ret < 0)
return ret;
@@ -233,12 +308,12 @@ finish:
if (!isfinite(res_new) ||
(res_new > 1e2 * DBL_EPSILON && res_old / res_new <= 1e-2)) {
mg2di_log(&ctx->priv->logger, MG2D_LOG_ERROR,
- "The relaxation step at level %d has diverged: %g -> %g\n",
+ "The step at level %d has diverged: %g -> %g\n",
level->depth, res_old, res_new);
return MG2D_ERR_DIVERGE;
}
- return 0;
+ return ret;
}
static void bnd_zero(MG2DBoundary *bdst, size_t nb_rows, size_t domain_size)
@@ -378,10 +453,11 @@ static int mg_levels_init(MG2DContext *ctx)
}
}
- cur->solver->logger = priv->logger;
- cur->solver->cpuflags = priv->cpuflags;
- cur->solver->tp = priv->tp;
+ cur->correct_fact = 1.0;
+ cur->solver->logger = priv->logger;
+ cur->solver->cpuflags = priv->cpuflags;
+ cur->solver->tp = priv->tp;
cur->solver->fd_stencil = ctx->fd_stencil;
cur->solver->relax->relax_factor = ctx->cfl_factor;
@@ -410,7 +486,7 @@ static int mg_levels_init(MG2DContext *ctx)
if (ctx->fd_stencil == 1)
op_restrict = GRID_TRANSFER_FW_3;
else
- op_restrict = GRID_TRANSFER_FW_5;
+ op_restrict = GRID_TRANSFER_FW_1;
} else
op_restrict = op_interp;
@@ -456,7 +532,7 @@ static int mg_levels_init(MG2DContext *ctx)
if (ret < 0)
return ret;
- ret = restrict_diff_coeffs(ctx, op_interp, cur->child->solver, cur->solver);
+ ret = restrict_diff_coeffs(ctx, GRID_TRANSFER_LAGRANGE_1, cur->child->solver, cur->solver);
if (ret < 0)
return ret;
}
@@ -809,8 +885,9 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
p = buf;
ret = snprintf(p, sizeof(buf) - (p - buf),
- "%2.2f%% level %d: %ld cycles %g s total time %g ms avg per call",
- level_total * 100.0 / priv->timer_solve.time_nsec, level->depth, level->count_cycles,
+ "%2.2f%% level %d (%zup): %ld cycles %g s total time %g ms avg per call",
+ level_total * 100.0 / priv->timer_solve.time_nsec, level->depth,
+ level->solver->domain_size[0], level->count_cycles,
level_total / 1e9, level_total / 1e6 / level->count_cycles);
if (ret > 0)
p += ret;
@@ -833,9 +910,10 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
}
ret = snprintf(p, sizeof(buf) - (p - buf),
- "||%2.2f%% init %2.2f%% residual %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)",
+ "||%2.2f%% init %2.2f%% residual (%g us avg) %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)",
level->solver->timer_init.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
level->solver->timer_res_calc.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
+ level->solver->timer_res_calc.time_nsec / 1e3 / level->solver->timer_res_calc.nb_runs,
level->solver->timer_bnd.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
level->solver->timer_bnd_fixval.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
level->solver->timer_bnd_reflect.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,