summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-05-06 18:21:20 +0200
committerAnton Khirnov <anton@khirnov.net>2019-05-16 18:25:31 +0200
commit83f009f5ee35f7ebce3059155859770accc53902 (patch)
treecf1cad2731dac6913054966d3fc37c18ec3185ed
parent6455697c307442aba4e981c26e3eb3f84f5a65cb (diff)
-rw-r--r--ell_grid_solve.c9
-rw-r--r--meson.build2
-rw-r--r--mg2d.c146
-rw-r--r--mg2d_test.c17
4 files changed, 131 insertions, 43 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index f8512f6..978c26f 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -171,11 +171,12 @@ static void boundaries_apply_reflect(double *dst, const ptrdiff_t dst_stride[2],
for (int j = 1; j <= FD_STENCIL_MAX; j++)
memcpy(dst + j * dst_stride[1], dst - j * dst_stride[1], sizeof(*dst) * boundary_size);
} else {
+//#pragma omp parallel for
for (size_t i = 0; i < boundary_size; i++) {
for (int j = 1; j <= FD_STENCIL_MAX; j++)
- dst[dst_stride[1] * j] = dst[-dst_stride[1] * j];
+ dst[dst_stride[0] * i + dst_stride[1] * j] = dst[dst_stride[0] * i -dst_stride[1] * j];
- dst += dst_stride[0];
+ //dst += dst_stride[0];
}
}
}
@@ -304,7 +305,7 @@ static void line_madd_c(ptrdiff_t linesize, double *dst, const double *src, doub
dst[i] += c * src[i];
}
-static int residual_add_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
+static inline int residual_add_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
{
#if 1
EGSContext *ctx = arg;
@@ -313,6 +314,8 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa
priv->r.line_add(ctx->domain_size[0], ctx->u->data + ctx->u->stride[0] * job_idx,
ctx->residual->data + ctx->residual->stride[0] * job_idx, priv->r.relax_factor);
#else
+ EGSContext *ctx = arg;
+ EGSInternal *priv = ctx->priv;
for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0;
diff --git a/meson.build b/meson.build
index 0c55f2e..e070883 100644
--- a/meson.build
+++ b/meson.build
@@ -26,7 +26,9 @@ libcblas = cc.find_library('blas')
liblapacke = cc.find_library('lapacke')
dep_tp = declare_dependency(link_args : '-lthreadpool')
+dep_omp = dependency('openmp')
+#deps = [dep_tp, libm, libcblas, liblapacke, dep_omp]
deps = [dep_tp, libm, libcblas, liblapacke]
cdata = configuration_data()
diff --git a/mg2d.c b/mg2d.c
index 7cdf7bc..ef7182d 100644
--- a/mg2d.c
+++ b/mg2d.c
@@ -51,6 +51,8 @@ typedef struct MG2DLevel {
struct MG2DLevel *child;
+ double correct_fact;
+
/* timings */
int64_t count_cycles;
Timer timer_solve;
@@ -104,7 +106,7 @@ static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thr
for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) {
const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0;
const ptrdiff_t idx_src = job_idx * level->prolong_tmp->stride[0] + idx0;
- level->solver->u->data[idx_dst] -= level->prolong_tmp->data[idx_src];
+ level->solver->u->data[idx_dst] -= level->correct_fact * level->prolong_tmp->data[idx_src];
}
return 0;
}
@@ -129,6 +131,51 @@ static int mg_solve(MG2DContext *ctx, MG2DLevel *level, enum EGSType solve_type,
return 0;
}
+static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact);
+
+static int mg_handle_child(MG2DContext *ctx, MG2DLevel *src, int allow_exact)
+{
+ MG2DLevel *dst = src->child;
+ int ret, ret_subgrid;
+
+ /* restrict the residual as to the coarser-level rhs */
+ mg2di_timer_start(&src->timer_restrict);
+ ret = mg2di_gt_transfer(src->transfer_restrict, dst->solver->rhs,
+ src->solver->residual);
+ mg2di_timer_stop(&src->timer_restrict);
+ if (ret < 0)
+ return ret;
+
+ /* solve on the coarser level */
+ ret_subgrid = mg_solve_subgrid(ctx, dst, allow_exact);
+ if (ret_subgrid < 0)
+ return ret_subgrid;
+
+ /* prolongate the coarser-level correction */
+ mg2di_timer_start(&src->timer_prolong);
+ ret = mg2di_gt_transfer(src->transfer_prolong, src->prolong_tmp,
+ dst->solver->u);
+ mg2di_timer_stop(&src->timer_prolong);
+ if (ret < 0)
+ return ret;
+
+ /* apply the correction */
+ mg2di_timer_start(&src->timer_correct);
+ tp_execute(ctx->priv->tp, src->solver->domain_size[1], coarse_correct_task, src);
+ mg2di_timer_stop(&src->timer_correct);
+
+ /* re-init the current-level solver (re-calc the residual) */
+ mg2di_timer_start(&src->timer_reinit);
+ ret = mg2di_egs_init(src->solver, src->egs_init_flags);
+ mg2di_timer_stop(&src->timer_reinit);
+ if (ret < 0)
+ return ret;
+
+ src->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS;
+
+ return ret_subgrid;
+}
+
static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
{
enum EGSType solve_type = allow_exact && level->solver->domain_size[0] <= ctx->max_exact_size ?
@@ -161,11 +208,13 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
return ret;
level->count_cycles++;
+ ret = 1;
goto finish;
}
for (int i = 0; i < ctx->nb_cycles; i++) {
double res_prev;
+ int nb_relax_post = ctx->nb_relax_post;
/* pre-restrict relaxation */
for (int j = 0; j < ctx->nb_relax_pre; j++) {
@@ -174,52 +223,78 @@ static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level, int allow_exact)
"pre-relax", j == ctx->nb_relax_pre - 1 && level->child);
if (ret < 0)
return ret;
- if (res_prev / level->solver->residual_max > 1.1)
- j--;
+ //if (res_prev / level->solver->residual_max > 1.1)
+ // j--;
}
if (level->child) {
- /* restrict the residual as to the coarser-level rhs */
- mg2di_timer_start(&level->timer_restrict);
- ret = mg2di_gt_transfer(level->transfer_restrict, level->child->solver->rhs,
- level->solver->residual);
- mg2di_timer_stop(&level->timer_restrict);
- if (ret < 0)
- return ret;
+ res_prev = level->solver->residual_max;
- /* solve on the coarser level */
- ret = mg_solve_subgrid(ctx, level->child, 1);
+ ret = mg_handle_child(ctx, level, 1);
if (ret < 0)
return ret;
- /* prolongate the coarser-level correction */
- mg2di_timer_start(&level->timer_prolong);
- ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp,
- level->child->solver->u);
- mg2di_timer_stop(&level->timer_prolong);
- if (ret < 0)
- return ret;
+ if (ret == 1 && res_prev / level->solver->residual_max < 0.2 && level->solver->residual_max > ctx->tol) {
+ mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE,
+ "Discarding correct from exact solve %g -> %g (%g)\n",
+ res_prev, level->solver->residual_max,
+ res_prev / level->solver->residual_max);
+
+ level->correct_fact = -1.0;
+ /* unapply the correction */
+ mg2di_timer_start(&level->timer_correct);
+ tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level);
+ mg2di_timer_stop(&level->timer_correct);
+
+ level->correct_fact = 1.0;
+
+ /* re-init the current-level solver (re-calc the residual) */
+ mg2di_timer_start(&level->timer_reinit);
+ ret = mg2di_egs_init(level->solver, level->egs_init_flags);
+ mg2di_timer_stop(&level->timer_reinit);
+ if (ret < 0)
+ return ret;
+
+ ret = mg_handle_child(ctx, level, 0);
+ if (ret < 0)
+ return ret;
+ }
+
+#if 0
+ if (0 && res_prev / level->solver->residual_max < 0.6) {
+ mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE,
+ "Rejecting coarse correct %g -> %g (%g)\n",
+ res_prev, level->solver->residual_max,
+ res_prev / level->solver->residual_max);
- /* apply the correction */
+ level->correct_fact = -1.0;
+ /* unapply the correction */
mg2di_timer_start(&level->timer_correct);
tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level);
mg2di_timer_stop(&level->timer_correct);
+ level->correct_fact = 1.0;
+
/* re-init the current-level solver (re-calc the residual) */
- res_prev = level->solver->residual_max;
mg2di_timer_start(&level->timer_reinit);
ret = mg2di_egs_init(level->solver, level->egs_init_flags);
mg2di_timer_stop(&level->timer_reinit);
if (ret < 0)
return ret;
-
- level->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS;
+ }
+ if (0 && res_prev / level->solver->residual_max < 0.9) {
+ nb_relax_post *= 4 * level->solver->residual_max / res_prev;
+ mg2di_log(&ctx->priv->logger, MG2D_LOG_VERBOSE,
+ "Extra post-relax: %d -> %d\n",
+ ctx->nb_relax_post, nb_relax_post);
+ }
+#endif
log_egs_step(ctx, level, "correct", res_prev, level->solver->residual_max);
}
/* post-correct relaxation */
- for (int j = 0; j < ctx->nb_relax_post; j++) {
+ for (int j = 0; j < nb_relax_post; j++) {
ret = mg_solve(ctx, level, solve_type, "post-relax", 0);
if (ret < 0)
return ret;
@@ -233,12 +308,12 @@ finish:
if (!isfinite(res_new) ||
(res_new > 1e2 * DBL_EPSILON && res_old / res_new <= 1e-2)) {
mg2di_log(&ctx->priv->logger, MG2D_LOG_ERROR,
- "The relaxation step at level %d has diverged: %g -> %g\n",
+ "The step at level %d has diverged: %g -> %g\n",
level->depth, res_old, res_new);
return MG2D_ERR_DIVERGE;
}
- return 0;
+ return ret;
}
static void bnd_zero(MG2DBoundary *bdst, size_t nb_rows, size_t domain_size)
@@ -378,10 +453,11 @@ static int mg_levels_init(MG2DContext *ctx)
}
}
- cur->solver->logger = priv->logger;
- cur->solver->cpuflags = priv->cpuflags;
- cur->solver->tp = priv->tp;
+ cur->correct_fact = 1.0;
+ cur->solver->logger = priv->logger;
+ cur->solver->cpuflags = priv->cpuflags;
+ cur->solver->tp = priv->tp;
cur->solver->fd_stencil = ctx->fd_stencil;
cur->solver->relax->relax_factor = ctx->cfl_factor;
@@ -410,7 +486,7 @@ static int mg_levels_init(MG2DContext *ctx)
if (ctx->fd_stencil == 1)
op_restrict = GRID_TRANSFER_FW_3;
else
- op_restrict = GRID_TRANSFER_FW_5;
+ op_restrict = GRID_TRANSFER_FW_1;
} else
op_restrict = op_interp;
@@ -456,7 +532,7 @@ static int mg_levels_init(MG2DContext *ctx)
if (ret < 0)
return ret;
- ret = restrict_diff_coeffs(ctx, op_interp, cur->child->solver, cur->solver);
+ ret = restrict_diff_coeffs(ctx, GRID_TRANSFER_LAGRANGE_1, cur->child->solver, cur->solver);
if (ret < 0)
return ret;
}
@@ -809,8 +885,9 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
p = buf;
ret = snprintf(p, sizeof(buf) - (p - buf),
- "%2.2f%% level %d: %ld cycles %g s total time %g ms avg per call",
- level_total * 100.0 / priv->timer_solve.time_nsec, level->depth, level->count_cycles,
+ "%2.2f%% level %d (%zup): %ld cycles %g s total time %g ms avg per call",
+ level_total * 100.0 / priv->timer_solve.time_nsec, level->depth,
+ level->solver->domain_size[0], level->count_cycles,
level_total / 1e9, level_total / 1e6 / level->count_cycles);
if (ret > 0)
p += ret;
@@ -833,9 +910,10 @@ void mg2d_print_stats(MG2DContext *ctx, const char *prefix)
}
ret = snprintf(p, sizeof(buf) - (p - buf),
- "||%2.2f%% init %2.2f%% residual %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)",
+ "||%2.2f%% init %2.2f%% residual (%g us avg) %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)",
level->solver->timer_init.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
level->solver->timer_res_calc.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
+ level->solver->timer_res_calc.time_nsec / 1e3 / level->solver->timer_res_calc.nb_runs,
level->solver->timer_bnd.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
level->solver->timer_bnd_fixval.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
level->solver->timer_bnd_reflect.time_nsec * 100.0 / level->solver->timer_solve.time_nsec,
diff --git a/mg2d_test.c b/mg2d_test.c
index 7f6654d..512f747 100644
--- a/mg2d_test.c
+++ b/mg2d_test.c
@@ -10,8 +10,8 @@
#define ARRAY_ELEMS(x) (sizeof(x) / sizeof(*x))
-#define MAXITER (1 << 18)
-#define TOL 1e-2
+#define MAXITER (1 << 15)
+#define TOL 1e-9
#define DOMAIN_SIZE 1.0
@@ -93,6 +93,8 @@ int main(int argc, char **argv)
long int gridsize;
int ret = 0;
+ const char *omp_threads = getenv("OMP_NUM_THREADS");
+
if (argc < 2) {
fprintf(stderr, "Usage: %s <N>\n", argv[0]);
return 1;
@@ -119,9 +121,13 @@ int main(int argc, char **argv)
ctx->nb_cycles = 1;
ctx->nb_relax_post = 2;
ctx->tol = TOL;
- ctx->nb_threads = 1;
- ctx->log_level = MG2D_LOG_INFO;
- ctx->max_levels = 1;
+ ctx->log_level = MG2D_LOG_VERBOSE;
+ //ctx->max_levels = 4;
+ ctx->max_exact_size = 33;
+ if (omp_threads)
+ ctx->nb_threads = strtol(omp_threads, NULL, 0);
+ if (ctx->nb_threads <= 0)
+ ctx->nb_threads = 1;
for (int bnd_loc = 0; bnd_loc < ARRAY_ELEMS(ctx->boundaries); bnd_loc++) {
MG2DBoundary *bnd = ctx->boundaries[bnd_loc];
@@ -173,7 +179,6 @@ int main(int argc, char **argv)
goto fail;
}
- ctx->log_level = MG2D_LOG_VERBOSE;
mg2d_print_stats(ctx, NULL);
{