summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-06-18 13:01:12 +0200
committerAnton Khirnov <anton@khirnov.net>2020-01-19 10:28:23 +0100
commit3992a96fcd8f4bd63365c1c141184b3370881de6 (patch)
tree0c030f3485b849fa6fcfb396ca9c960d1afc4972
parentec1c614ee67313366d943bdb16f36d61022818b7 (diff)
tmp
-rw-r--r--common.h7
-rw-r--r--ell_grid_solve.c323
2 files changed, 255 insertions, 75 deletions
diff --git a/common.h b/common.h
index 0c644bd..e502853 100644
--- a/common.h
+++ b/common.h
@@ -24,6 +24,13 @@
#define MIN(x, y) ((x) > (y) ? (y) : (x))
#define ARRAY_ELEMS(arr) (sizeof(arr) / sizeof(*arr))
+#define SWAP(x, y, type) \
+do { \
+ type tmp = x; \
+ x = y; \
+ y = tmp; \
+} while (0)
+
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 3f0cb46..ecbf6dc 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -41,6 +41,90 @@
#include "residual_calc.h"
#include "timer.h"
+typedef struct StepControl {
+ /* estimated value for a good step */
+ double hint;
+ /* minimum step size, fail if this value is reached */
+ double step_min;
+
+ double cur_converge;
+ double cur_diverge;
+ double prev_converge;
+ double prev_diverge;
+
+ int64_t diverge_count;
+} StepControl;
+
+static void sc_reset(StepControl *sc)
+{
+ sc->hint = DBL_MAX;
+ sc->cur_converge = DBL_MAX;
+ sc->cur_diverge = DBL_MAX;
+ sc->prev_converge = DBL_MAX;
+ sc->prev_diverge = DBL_MAX;
+}
+
+static void sc_init(StepControl *sc, double hint, double step_min)
+{
+ sc->hint = hint;
+ sc->step_min = step_min;
+ sc->cur_converge = DBL_MAX;
+ sc->cur_diverge = DBL_MAX;
+}
+
+static void sc_step_converge(StepControl *sc, double step, double conv_fact)
+{
+ sc->cur_converge = step;
+ sc->prev_converge = step;
+ mg2di_assert(sc->cur_diverge == DBL_MAX || step < sc->cur_diverge);
+ if (sc->prev_diverge != DBL_MAX && step >= sc->prev_diverge)
+ sc->prev_diverge = DBL_MAX;
+}
+
+static void sc_step_diverge(StepControl *sc, double step)
+{
+ sc->cur_diverge = step;
+ sc->prev_diverge = step;
+ if (sc->cur_converge != DBL_MAX && step <= sc->cur_converge)
+ sc->cur_converge = DBL_MAX;
+ if (sc->prev_converge != DBL_MAX && step <= sc->prev_converge)
+ sc->prev_converge = DBL_MAX;
+
+ sc->diverge_count++;
+}
+
+static double sc_step_get(StepControl *sc)
+{
+ double step = 0.25;
+ double guess_converge = sc->cur_converge == DBL_MAX ? sc->prev_converge : sc->cur_converge;
+ double guess_diverge = sc->cur_diverge == DBL_MAX ? sc->prev_diverge : sc->cur_diverge;
+
+ if (guess_converge != DBL_MAX && guess_diverge != DBL_MAX) {
+ if (guess_diverge - guess_converge < 0.1 * guess_diverge)
+ step = guess_converge;
+ else
+ step = 0.5 * (guess_diverge + guess_converge);
+ } else if (guess_diverge != DBL_MAX) {
+ step = 0.8 * guess_diverge;
+ } else if (guess_converge != DBL_MAX) {
+ step = 1.1 * guess_converge;
+ } else if (sc->hint != DBL_MAX)
+ step = sc->hint;
+
+ if (sc->step_min != DBL_MAX && step < sc->step_min) {
+ fprintf(stderr, "%g %g %g %g %g\n", sc->cur_converge, sc->prev_converge, sc->cur_diverge, sc->prev_diverge, sc->hint);
+ mg2di_assert(0);
+ return -1.0;
+ }
+
+ return step;
+}
+
+static int sc_step_confirm(StepControl *sc, double conv_factor)
+{
+ return 0;
+}
+
static const double relax_factors[FD_STENCIL_MAX] = {
[0] = 1.0 / 5,
[1] = 1.0 / 7,
@@ -76,16 +160,22 @@ typedef struct EGSExactInternal {
BiCGStabContext *bicgstab;
} EGSExactInternal;
+#define HIST_SIZE 3
+
struct EGSInternal {
NDArray *rhs_base;
NDArray *residual_base;
NDArray *u_base;
+ NDArray *u_exterior[HIST_SIZE];
+ NDArray *u_interior[HIST_SIZE];
+ int u_iteration[HIST_SIZE];
+
+ double residual_max[HIST_SIZE];
+ double step[HIST_SIZE];
+ int residual_iteration;
- NDArray *u_next_base;
- NDArray *u_next_exterior;
- NDArray *u_next;
- int u_next_valid;
+ int iteration_cur;
/* all the diff coeffs concatenated */
NDArray *diff_coeffs_public_base;
@@ -108,6 +198,8 @@ struct EGSInternal {
TPContext *tp_internal;
+ StepControl sc;
+
MPI_Comm comm;
DomainGeometry *dg;
unsigned int local_component;
@@ -175,13 +267,14 @@ static void boundaries_sync(EGSContext *ctx, NDArray *a_dst)
}
}
-static void residual_calc(EGSContext *ctx, int export_res)
+static void residual_calc(EGSContext *ctx, double *residual_max, NDArray *dst_add,
+ NDArray *src, double step)
{
EGSInternal *priv = ctx->priv;
const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
ptrdiff_t *offset = priv->residual_calc_offset[priv->steps_since_sync];
size_t *size = priv->residual_calc_size[priv->steps_since_sync];
- NDArray *dst = export_res ? ctx->residual : priv->u_next;
+ NDArray *dst = dst_add ? dst_add : ctx->residual;
int reflect_flags = 0;
mg2di_timer_start(&ctx->timer_res_calc);
@@ -194,19 +287,19 @@ static void residual_calc(EGSContext *ctx, int export_res)
priv->dg->components[priv->local_component].bnd_is_outer[bnd_idx])
reflect_flags |= 1 << bnd_idx;
- mg2di_residual_calc(priv->rescalc, size, &ctx->residual_max,
+ step *= ctx->step[0] * ctx->step[1];
+ mg2di_residual_calc(priv->rescalc, size, residual_max,
NDPTR2D(dst, offset[0], offset[1]), dst->stride[0],
- NDPTR2D(ctx->u, offset[0], offset[1]), ctx->u->stride[0],
+ NDPTR2D(src, offset[0], offset[1]), src->stride[0],
NDPTR2D(ctx->rhs, offset[0], offset[1]), ctx->rhs->stride[0],
diff_coeffs, priv->diff_coeffs[0]->stride[0],
- export_res ? 0.0 : 1.0, export_res ? 1.0 : priv->r.relax_factor,
- reflect_flags, FD_STENCIL_MAX);
+ dst_add ? 1.0 : 0.0, dst_add ? step : 1.0, reflect_flags, FD_STENCIL_MAX);
mg2di_timer_stop(&ctx->timer_res_calc);
if (priv->dg->nb_components > 1) {
mg2di_timer_start(&ctx->timer_mpi_sync);
- MPI_Allreduce(MPI_IN_PLACE, &ctx->residual_max, 1,
+ MPI_Allreduce(MPI_IN_PLACE, residual_max, 1,
MPI_DOUBLE, MPI_MAX, priv->comm);
mg2di_timer_stop(&ctx->timer_mpi_sync);
}
@@ -390,7 +483,8 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa
EGSInternal *priv = ctx->priv;
priv->r.line_add(ctx->domain_size[0], ctx->u->data + ctx->u->stride[0] * job_idx,
- ctx->residual->data + ctx->residual->stride[0] * job_idx, priv->r.relax_factor);
+ ctx->residual->data + ctx->residual->stride[0] * job_idx,
+ priv->r.relax_factor * ctx->step[0] * ctx->step[1]);
#else
for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) {
ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0;
@@ -406,31 +500,33 @@ static int solve_relax_step(EGSContext *ctx, int export_res)
{
EGSInternal *priv = ctx->priv;
EGSRelaxContext *r = ctx->relax;
-
- int u_next_valid = priv->u_next_valid;
-
- if (u_next_valid) {
- NDArray *tmp = ctx->u;
- NDArray *tmp_ext = ctx->u_exterior;
- NDArray *tmp_base = priv->u_base;
- ctx->u = priv->u_next;
- ctx->u_exterior = priv->u_next_exterior;
- priv->u_base = priv->u_next_base;
- priv->u_next = tmp;
- priv->u_next_exterior = tmp_ext;
- priv->u_next_base = tmp_base;
- priv->u_next_valid = 0;
- }
+ int ic = priv->iteration_cur;
if (export_res) {
- if (!u_next_valid) {
+ if (priv->u_iteration[(ic + 1) % HIST_SIZE] < ic + 1) {
+ if (priv->residual_iteration < ic) {
+ residual_calc(ctx, &priv->residual_max[ic % HIST_SIZE], NULL, priv->u_interior[ic % HIST_SIZE], 0);
+ priv->residual_iteration = ic;
+ }
+ mg2di_assert(priv->residual_iteration == ic);
+
+ priv->step[ic % HIST_SIZE] = sc_step_get(&priv->sc);
+ if (priv->step[ic % HIST_SIZE] < 0) {
+ mg2di_log(&ctx->logger, MG2D_LOG_ERROR,
+ "Minimum step reached.");
+ return -EDOM;
+ }
+ priv->r.relax_factor = priv->step[0];
+
+ abort();
mg2di_timer_start(&r->timer_correct);
tp_execute(ctx->tp, ctx->domain_size[1], residual_add_task, ctx);
mg2di_timer_stop(&r->timer_correct);
priv->reflect_skip = 0;
- boundaries_apply(ctx, ctx->u, 0);
+ boundaries_apply(ctx, priv->u_interior[(ic + 1) % HIST_SIZE], 0);
+ priv->u_iteration[(ic + 1) % HIST_SIZE] = ic + 1;
priv->steps_since_sync++;
if (priv->steps_since_sync >= priv->steps_per_sync) {
@@ -438,22 +534,81 @@ static int solve_relax_step(EGSContext *ctx, int export_res)
priv->steps_since_sync = 0;
}
}
+ mg2di_assert(priv->u_iteration[(ic + 1) % HIST_SIZE] == ic + 1);
- residual_calc(ctx, 1);
+ residual_calc(ctx, &priv->residual_max[(ic + 1) % HIST_SIZE], NULL,
+ priv->u_interior[(ic + 1) % HIST_SIZE], 0);
boundaries_sync(ctx, ctx->residual);
+ priv->residual_iteration = ic + 1;
} else {
- mg2di_assert(u_next_valid);
- residual_calc(ctx, 0);
- boundaries_apply(ctx, priv->u_next, 0);
+ double res_old, res_new;
+ int diverged;
+
+ mg2di_assert(priv->u_iteration[(ic + 1) % HIST_SIZE] == ic + 1);
+ res_old = priv->residual_max[ic % HIST_SIZE];
+
+ priv->step[(ic + 1) % HIST_SIZE] = sc_step_get(&priv->sc);
+ if (priv->step[(ic + 1) % HIST_SIZE] < 0) {
+ mg2di_log(&ctx->logger, MG2D_LOG_ERROR,
+ "Minimum step reached.");
+ return -EDOM;
+ }
+
+ residual_calc(ctx, &priv->residual_max[(ic + 1) % HIST_SIZE], priv->u_interior[(ic + 2) % HIST_SIZE],
+ priv->u_interior[(ic + 1) % HIST_SIZE], priv->step[(ic + 1) % HIST_SIZE]);
+
+ res_new = priv->residual_max[(ic + 1) % HIST_SIZE];
+ diverged = res_new > 1e-6 && res_new / res_old - 1.0 >= 1e-2 ;
+ while (diverged) {
+ double tmp, step;
+
+ fprintf(stderr, "%zu diverge step %g fact %g\n", ctx->domain_size[0],
+ priv->step[(ic + 1) % HIST_SIZE], res_new / res_old);
+
+ sc_step_diverge(&priv->sc, priv->step[ic % HIST_SIZE]);
+
+ priv->step[ic % HIST_SIZE] = sc_step_get(&priv->sc);
+ if (priv->step[(ic) % HIST_SIZE] < 0) {
+ mg2di_log(&ctx->logger, MG2D_LOG_ERROR,
+ "Minimum step reached.");
+ return -EDOM;
+ }
+ residual_calc(ctx, &tmp, priv->u_interior[(ic + 1) % HIST_SIZE],
+ priv->u_interior[ic % HIST_SIZE], priv->step[ic % HIST_SIZE]);
+
+ priv->step[(ic + 1) % HIST_SIZE] = priv->step[ic % HIST_SIZE];
+ residual_calc(ctx, &priv->residual_max[(ic + 1) % HIST_SIZE], priv->u_interior[(ic + 2) % HIST_SIZE],
+ priv->u_interior[(ic + 1) % HIST_SIZE], priv->step[(ic + 1) % HIST_SIZE]);
+ res_new = priv->residual_max[(ic + 1) % HIST_SIZE];
+ diverged = res_new > 1e-6 && res_new >= res_old;
+ }
+
+ sc_step_converge(&priv->sc, priv->step[ic % HIST_SIZE], res_old / res_new);
+ boundaries_apply(ctx, priv->u_interior[(ic + 2) % HIST_SIZE], 0);
+ priv->u_iteration[(ic + 2) % HIST_SIZE] = ic + 2;
priv->steps_since_sync++;
if (priv->steps_since_sync >= priv->steps_per_sync) {
- boundaries_sync(ctx, priv->u_next);
+ boundaries_sync(ctx, priv->u_interior[(ic + 2) % HIST_SIZE]);
priv->steps_since_sync = 0;
}
- priv->u_next_valid = 1;
+
+ {
+ static int i;
+ if (!(i & 1023)) {
+ fprintf(stderr, "%zu converge %g diverge %g guess %g diverges %ld\n", ctx->domain_size[0], priv->sc.cur_converge,
+ priv->sc.prev_diverge, ctx->relax->relax_factor * ctx->relax->relax_multiplier, priv->sc.diverge_count);
+ }
+ i++;
+ }
}
+ ic++;
+ ctx->u = priv->u_interior [ic % HIST_SIZE];
+ ctx->u_exterior = priv->u_exterior [ic % HIST_SIZE];
+ ctx->residual_max = priv->residual_max[ic % HIST_SIZE];
+ priv->iteration_cur = ic;
+
return 0;
}
@@ -1048,7 +1203,7 @@ static int solve_exact(EGSContext *ctx)
priv->reflect_skip = 0;
boundaries_apply(ctx, ctx->u, 0);
- residual_calc(ctx, 1);
+ residual_calc(ctx, &ctx->residual_max, NULL, ctx->u, 0);
return 0;
}
@@ -1185,15 +1340,6 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
goto finish;
}
- if (r->relax_factor == 0.0)
- priv->r.relax_factor = relax_factors[ctx->fd_stencil - 1];
- else
- priv->r.relax_factor = r->relax_factor;
- priv->r.relax_factor *= ctx->step[0] * ctx->step[0];
-
- if (r->relax_multiplier > 0.0)
- priv->r.relax_factor *= r->relax_multiplier;
-
priv->r.line_add = line_madd_c;
#if HAVE_EXTERNAL_ASM
if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3)
@@ -1228,6 +1374,11 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
arg.dc = MG2D_DIFF_COEFF_11;
arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]);
tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg);
+
+ if (r->relax_factor > 0.0) {
+ double cfl_step = r->relax_factor * r->relax_multiplier;
+ sc_init(&priv->sc, cfl_step, cfl_step * 1e-3);
+ }
}
if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS))
@@ -1303,6 +1454,16 @@ int mg2di_egs_init(EGSContext *ctx, int flags)
if (ret < 0)
goto finish;
+ SWAP(priv->u_exterior[0], priv->u_exterior[priv->iteration_cur % ARRAY_ELEMS(priv->u_exterior)], NDArray*);
+ SWAP(priv->u_interior[0], priv->u_interior[priv->iteration_cur % ARRAY_ELEMS(priv->u_interior)], NDArray*);
+ priv->iteration_cur = 0;
+ priv->u_iteration[0] = 0;
+ priv->residual_iteration = -1;
+ for (int i = 1; i < ARRAY_ELEMS(priv->u_iteration); i++) {
+ priv->u_iteration[i] = -1;
+ priv->residual_max[i] = -1;
+ }
+
finish:
mg2di_timer_stop(&ctx->timer_init);
@@ -1315,22 +1476,29 @@ finish:
}
boundaries_sync(ctx, ctx->rhs);
- boundaries_apply(ctx, ctx->u, 1);
- boundaries_sync(ctx, ctx->u);
+ for (int i = 0; i < ARRAY_ELEMS(priv->u_interior); i++) {
+ boundaries_apply(ctx, priv->u_interior[i], 1);
+ boundaries_sync(ctx, priv->u_interior[i]);
+ }
priv->steps_since_sync = 0;
- boundaries_apply(ctx, priv->u_next, 1);
- boundaries_sync(ctx, priv->u_next);
+ priv->step[0] = sc_step_get(&priv->sc);
+ if (priv->step[0] < 0) {
+ mg2di_log(&ctx->logger, MG2D_LOG_ERROR,
+ "Minimum step reached. 0");
+ return -EDOM;
+ }
- residual_calc(ctx, 0);
- boundaries_apply(ctx, priv->u_next, 0);
+ residual_calc(ctx, &priv->residual_max[0], priv->u_interior[1], priv->u_interior[0], priv->step[0]);
+ boundaries_apply(ctx, priv->u_interior[1], 0);
+ priv->u_iteration[1] = 1;
+ ctx->residual_max = priv->residual_max[0];
priv->steps_since_sync++;
if (priv->steps_since_sync >= priv->steps_per_sync) {
- boundaries_sync(ctx, priv->u_next);
+ boundaries_sync(ctx, priv->u_interior[1]);
priv->steps_since_sync = 0;
}
- priv->u_next_valid = 1;
}
mg2di_timer_stop(&ctx->timer_solve);
@@ -1347,7 +1515,7 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
Slice slice_exterior[2];
size_t size_alloc[2];
- size_t size_dc[2];
+ size_t size_dc[2], size_u[2];
size_t extra_points[4];
int ret;
@@ -1372,26 +1540,30 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2])
slice_interior[1].end = slice_interior[1].start + dc->interior.size[0];
slice_interior[1].step = 1;
- ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_alloc, NDARRAY_ALLOC_ZERO);
- if (ret < 0)
- return ret;
- ret = mg2di_ndarray_alloc(&priv->u_next_base, 2, size_alloc, NDARRAY_ALLOC_ZERO);
- if (ret < 0)
- return ret;
+ size_u[0] = size_alloc[0] * HIST_SIZE;
+ size_u[1] = size_alloc[1];
- ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice_interior);
- if (ret < 0)
- return ret;
- ret = mg2di_ndarray_slice(&priv->u_next, priv->u_next_base, slice_interior);
+ ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_u, NDARRAY_ALLOC_ZERO);
if (ret < 0)
return ret;
- ret = mg2di_ndarray_slice(&ctx->u_exterior, priv->u_base, slice_exterior);
- if (ret < 0)
- return ret;
- ret = mg2di_ndarray_slice(&priv->u_next_exterior, priv->u_next_base, slice_exterior);
- if (ret < 0)
- return ret;
+ for (int i = 0; i < ARRAY_ELEMS(priv->u_exterior); i++) {
+ const Slice slice_u[2] = { SLICE(i * size_alloc[0], (i + 1) * size_alloc[0], 1), SLICE_NULL };
+ NDArray *tmp;
+
+ ret = mg2di_ndarray_slice(&tmp, priv->u_base, slice_u);
+ if (ret < 0)
+ return ret;
+
+ ret = mg2di_ndarray_slice(&priv->u_exterior[i], tmp, slice_exterior);
+ if (ret >= 0)
+ ret = mg2di_ndarray_slice(&priv->u_interior[i], tmp, slice_interior);
+ mg2di_ndarray_free(&tmp);
+ if (ret < 0)
+ return ret;
+ }
+ ctx->u = priv->u_interior[0];
+ ctx->u_exterior = priv->u_exterior[0];
ret = mg2di_ndarray_alloc(&priv->rhs_base, 2, size_alloc, 0);
if (ret < 0)
@@ -1522,6 +1694,8 @@ static EGSContext *egs_alloc(const DomainGeometry *dg, unsigned int local_compon
if (!ctx->priv->rescalc)
goto fail;
+ sc_reset(&ctx->priv->sc);
+
mg2di_timer_init(&ctx->timer_bnd);
mg2di_timer_init(&ctx->timer_bnd_fixval);
mg2di_timer_init(&ctx->timer_bnd_falloff);
@@ -1601,12 +1775,11 @@ void mg2di_egs_free(EGSContext **pctx)
exact_arrays_free(ctx);
- mg2di_ndarray_free(&ctx->u);
- mg2di_ndarray_free(&ctx->u_exterior);
+ for (int i = 0; i < ARRAY_ELEMS(ctx->priv->u_exterior); i++) {
+ mg2di_ndarray_free(&ctx->priv->u_exterior[i]);
+ mg2di_ndarray_free(&ctx->priv->u_interior[i]);
+ }
mg2di_ndarray_free(&ctx->priv->u_base);
- mg2di_ndarray_free(&ctx->priv->u_next);
- mg2di_ndarray_free(&ctx->priv->u_next_exterior);
- mg2di_ndarray_free(&ctx->priv->u_next_base);
mg2di_ndarray_free(&ctx->rhs);
mg2di_ndarray_free(&ctx->priv->rhs_base);