From 3992a96fcd8f4bd63365c1c141184b3370881de6 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 18 Jun 2019 13:01:12 +0200 Subject: tmp --- common.h | 7 ++ ell_grid_solve.c | 323 ++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 255 insertions(+), 75 deletions(-) diff --git a/common.h b/common.h index 0c644bd..e502853 100644 --- a/common.h +++ b/common.h @@ -24,6 +24,13 @@ #define MIN(x, y) ((x) > (y) ? (y) : (x)) #define ARRAY_ELEMS(arr) (sizeof(arr) / sizeof(*arr)) +#define SWAP(x, y, type) \ +do { \ + type tmp = x; \ + x = y; \ + y = tmp; \ +} while (0) + #include #include #include diff --git a/ell_grid_solve.c b/ell_grid_solve.c index 3f0cb46..ecbf6dc 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -41,6 +41,90 @@ #include "residual_calc.h" #include "timer.h" +typedef struct StepControl { + /* estimated value for a good step */ + double hint; + /* minimum step size, fail if this value is reached */ + double step_min; + + double cur_converge; + double cur_diverge; + double prev_converge; + double prev_diverge; + + int64_t diverge_count; +} StepControl; + +static void sc_reset(StepControl *sc) +{ + sc->hint = DBL_MAX; + sc->cur_converge = DBL_MAX; + sc->cur_diverge = DBL_MAX; + sc->prev_converge = DBL_MAX; + sc->prev_diverge = DBL_MAX; +} + +static void sc_init(StepControl *sc, double hint, double step_min) +{ + sc->hint = hint; + sc->step_min = step_min; + sc->cur_converge = DBL_MAX; + sc->cur_diverge = DBL_MAX; +} + +static void sc_step_converge(StepControl *sc, double step, double conv_fact) +{ + sc->cur_converge = step; + sc->prev_converge = step; + mg2di_assert(sc->cur_diverge == DBL_MAX || step < sc->cur_diverge); + if (sc->prev_diverge != DBL_MAX && step >= sc->prev_diverge) + sc->prev_diverge = DBL_MAX; +} + +static void sc_step_diverge(StepControl *sc, double step) +{ + sc->cur_diverge = step; + sc->prev_diverge = step; + if (sc->cur_converge != DBL_MAX && step <= sc->cur_converge) + sc->cur_converge = DBL_MAX; + if (sc->prev_converge != DBL_MAX && step <= sc->prev_converge) + sc->prev_converge = DBL_MAX; + + sc->diverge_count++; +} + +static double sc_step_get(StepControl *sc) +{ + double step = 0.25; + double guess_converge = sc->cur_converge == DBL_MAX ? sc->prev_converge : sc->cur_converge; + double guess_diverge = sc->cur_diverge == DBL_MAX ? sc->prev_diverge : sc->cur_diverge; + + if (guess_converge != DBL_MAX && guess_diverge != DBL_MAX) { + if (guess_diverge - guess_converge < 0.1 * guess_diverge) + step = guess_converge; + else + step = 0.5 * (guess_diverge + guess_converge); + } else if (guess_diverge != DBL_MAX) { + step = 0.8 * guess_diverge; + } else if (guess_converge != DBL_MAX) { + step = 1.1 * guess_converge; + } else if (sc->hint != DBL_MAX) + step = sc->hint; + + if (sc->step_min != DBL_MAX && step < sc->step_min) { + fprintf(stderr, "%g %g %g %g %g\n", sc->cur_converge, sc->prev_converge, sc->cur_diverge, sc->prev_diverge, sc->hint); + mg2di_assert(0); + return -1.0; + } + + return step; +} + +static int sc_step_confirm(StepControl *sc, double conv_factor) +{ + return 0; +} + static const double relax_factors[FD_STENCIL_MAX] = { [0] = 1.0 / 5, [1] = 1.0 / 7, @@ -76,16 +160,22 @@ typedef struct EGSExactInternal { BiCGStabContext *bicgstab; } EGSExactInternal; +#define HIST_SIZE 3 + struct EGSInternal { NDArray *rhs_base; NDArray *residual_base; NDArray *u_base; + NDArray *u_exterior[HIST_SIZE]; + NDArray *u_interior[HIST_SIZE]; + int u_iteration[HIST_SIZE]; + + double residual_max[HIST_SIZE]; + double step[HIST_SIZE]; + int residual_iteration; - NDArray *u_next_base; - NDArray *u_next_exterior; - NDArray *u_next; - int u_next_valid; + int iteration_cur; /* all the diff coeffs concatenated */ NDArray *diff_coeffs_public_base; @@ -108,6 +198,8 @@ struct EGSInternal { TPContext *tp_internal; + StepControl sc; + MPI_Comm comm; DomainGeometry *dg; unsigned int local_component; @@ -175,13 +267,14 @@ static void boundaries_sync(EGSContext *ctx, NDArray *a_dst) } } -static void residual_calc(EGSContext *ctx, int export_res) +static void residual_calc(EGSContext *ctx, double *residual_max, NDArray *dst_add, + NDArray *src, double step) { EGSInternal *priv = ctx->priv; const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; ptrdiff_t *offset = priv->residual_calc_offset[priv->steps_since_sync]; size_t *size = priv->residual_calc_size[priv->steps_since_sync]; - NDArray *dst = export_res ? ctx->residual : priv->u_next; + NDArray *dst = dst_add ? dst_add : ctx->residual; int reflect_flags = 0; mg2di_timer_start(&ctx->timer_res_calc); @@ -194,19 +287,19 @@ static void residual_calc(EGSContext *ctx, int export_res) priv->dg->components[priv->local_component].bnd_is_outer[bnd_idx]) reflect_flags |= 1 << bnd_idx; - mg2di_residual_calc(priv->rescalc, size, &ctx->residual_max, + step *= ctx->step[0] * ctx->step[1]; + mg2di_residual_calc(priv->rescalc, size, residual_max, NDPTR2D(dst, offset[0], offset[1]), dst->stride[0], - NDPTR2D(ctx->u, offset[0], offset[1]), ctx->u->stride[0], + NDPTR2D(src, offset[0], offset[1]), src->stride[0], NDPTR2D(ctx->rhs, offset[0], offset[1]), ctx->rhs->stride[0], diff_coeffs, priv->diff_coeffs[0]->stride[0], - export_res ? 0.0 : 1.0, export_res ? 1.0 : priv->r.relax_factor, - reflect_flags, FD_STENCIL_MAX); + dst_add ? 1.0 : 0.0, dst_add ? step : 1.0, reflect_flags, FD_STENCIL_MAX); mg2di_timer_stop(&ctx->timer_res_calc); if (priv->dg->nb_components > 1) { mg2di_timer_start(&ctx->timer_mpi_sync); - MPI_Allreduce(MPI_IN_PLACE, &ctx->residual_max, 1, + MPI_Allreduce(MPI_IN_PLACE, residual_max, 1, MPI_DOUBLE, MPI_MAX, priv->comm); mg2di_timer_stop(&ctx->timer_mpi_sync); } @@ -390,7 +483,8 @@ static int residual_add_task(void *arg, unsigned int job_idx, unsigned int threa EGSInternal *priv = ctx->priv; priv->r.line_add(ctx->domain_size[0], ctx->u->data + ctx->u->stride[0] * job_idx, - ctx->residual->data + ctx->residual->stride[0] * job_idx, priv->r.relax_factor); + ctx->residual->data + ctx->residual->stride[0] * job_idx, + priv->r.relax_factor * ctx->step[0] * ctx->step[1]); #else for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { ptrdiff_t idx = job_idx * ctx->u->stride[0] + idx0; @@ -406,31 +500,33 @@ static int solve_relax_step(EGSContext *ctx, int export_res) { EGSInternal *priv = ctx->priv; EGSRelaxContext *r = ctx->relax; - - int u_next_valid = priv->u_next_valid; - - if (u_next_valid) { - NDArray *tmp = ctx->u; - NDArray *tmp_ext = ctx->u_exterior; - NDArray *tmp_base = priv->u_base; - ctx->u = priv->u_next; - ctx->u_exterior = priv->u_next_exterior; - priv->u_base = priv->u_next_base; - priv->u_next = tmp; - priv->u_next_exterior = tmp_ext; - priv->u_next_base = tmp_base; - priv->u_next_valid = 0; - } + int ic = priv->iteration_cur; if (export_res) { - if (!u_next_valid) { + if (priv->u_iteration[(ic + 1) % HIST_SIZE] < ic + 1) { + if (priv->residual_iteration < ic) { + residual_calc(ctx, &priv->residual_max[ic % HIST_SIZE], NULL, priv->u_interior[ic % HIST_SIZE], 0); + priv->residual_iteration = ic; + } + mg2di_assert(priv->residual_iteration == ic); + + priv->step[ic % HIST_SIZE] = sc_step_get(&priv->sc); + if (priv->step[ic % HIST_SIZE] < 0) { + mg2di_log(&ctx->logger, MG2D_LOG_ERROR, + "Minimum step reached."); + return -EDOM; + } + priv->r.relax_factor = priv->step[0]; + + abort(); mg2di_timer_start(&r->timer_correct); tp_execute(ctx->tp, ctx->domain_size[1], residual_add_task, ctx); mg2di_timer_stop(&r->timer_correct); priv->reflect_skip = 0; - boundaries_apply(ctx, ctx->u, 0); + boundaries_apply(ctx, priv->u_interior[(ic + 1) % HIST_SIZE], 0); + priv->u_iteration[(ic + 1) % HIST_SIZE] = ic + 1; priv->steps_since_sync++; if (priv->steps_since_sync >= priv->steps_per_sync) { @@ -438,22 +534,81 @@ static int solve_relax_step(EGSContext *ctx, int export_res) priv->steps_since_sync = 0; } } + mg2di_assert(priv->u_iteration[(ic + 1) % HIST_SIZE] == ic + 1); - residual_calc(ctx, 1); + residual_calc(ctx, &priv->residual_max[(ic + 1) % HIST_SIZE], NULL, + priv->u_interior[(ic + 1) % HIST_SIZE], 0); boundaries_sync(ctx, ctx->residual); + priv->residual_iteration = ic + 1; } else { - mg2di_assert(u_next_valid); - residual_calc(ctx, 0); - boundaries_apply(ctx, priv->u_next, 0); + double res_old, res_new; + int diverged; + + mg2di_assert(priv->u_iteration[(ic + 1) % HIST_SIZE] == ic + 1); + res_old = priv->residual_max[ic % HIST_SIZE]; + + priv->step[(ic + 1) % HIST_SIZE] = sc_step_get(&priv->sc); + if (priv->step[(ic + 1) % HIST_SIZE] < 0) { + mg2di_log(&ctx->logger, MG2D_LOG_ERROR, + "Minimum step reached."); + return -EDOM; + } + + residual_calc(ctx, &priv->residual_max[(ic + 1) % HIST_SIZE], priv->u_interior[(ic + 2) % HIST_SIZE], + priv->u_interior[(ic + 1) % HIST_SIZE], priv->step[(ic + 1) % HIST_SIZE]); + + res_new = priv->residual_max[(ic + 1) % HIST_SIZE]; + diverged = res_new > 1e-6 && res_new / res_old - 1.0 >= 1e-2 ; + while (diverged) { + double tmp, step; + + fprintf(stderr, "%zu diverge step %g fact %g\n", ctx->domain_size[0], + priv->step[(ic + 1) % HIST_SIZE], res_new / res_old); + + sc_step_diverge(&priv->sc, priv->step[ic % HIST_SIZE]); + + priv->step[ic % HIST_SIZE] = sc_step_get(&priv->sc); + if (priv->step[(ic) % HIST_SIZE] < 0) { + mg2di_log(&ctx->logger, MG2D_LOG_ERROR, + "Minimum step reached."); + return -EDOM; + } + residual_calc(ctx, &tmp, priv->u_interior[(ic + 1) % HIST_SIZE], + priv->u_interior[ic % HIST_SIZE], priv->step[ic % HIST_SIZE]); + + priv->step[(ic + 1) % HIST_SIZE] = priv->step[ic % HIST_SIZE]; + residual_calc(ctx, &priv->residual_max[(ic + 1) % HIST_SIZE], priv->u_interior[(ic + 2) % HIST_SIZE], + priv->u_interior[(ic + 1) % HIST_SIZE], priv->step[(ic + 1) % HIST_SIZE]); + res_new = priv->residual_max[(ic + 1) % HIST_SIZE]; + diverged = res_new > 1e-6 && res_new >= res_old; + } + + sc_step_converge(&priv->sc, priv->step[ic % HIST_SIZE], res_old / res_new); + boundaries_apply(ctx, priv->u_interior[(ic + 2) % HIST_SIZE], 0); + priv->u_iteration[(ic + 2) % HIST_SIZE] = ic + 2; priv->steps_since_sync++; if (priv->steps_since_sync >= priv->steps_per_sync) { - boundaries_sync(ctx, priv->u_next); + boundaries_sync(ctx, priv->u_interior[(ic + 2) % HIST_SIZE]); priv->steps_since_sync = 0; } - priv->u_next_valid = 1; + + { + static int i; + if (!(i & 1023)) { + fprintf(stderr, "%zu converge %g diverge %g guess %g diverges %ld\n", ctx->domain_size[0], priv->sc.cur_converge, + priv->sc.prev_diverge, ctx->relax->relax_factor * ctx->relax->relax_multiplier, priv->sc.diverge_count); + } + i++; + } } + ic++; + ctx->u = priv->u_interior [ic % HIST_SIZE]; + ctx->u_exterior = priv->u_exterior [ic % HIST_SIZE]; + ctx->residual_max = priv->residual_max[ic % HIST_SIZE]; + priv->iteration_cur = ic; + return 0; } @@ -1048,7 +1203,7 @@ static int solve_exact(EGSContext *ctx) priv->reflect_skip = 0; boundaries_apply(ctx, ctx->u, 0); - residual_calc(ctx, 1); + residual_calc(ctx, &ctx->residual_max, NULL, ctx->u, 0); return 0; } @@ -1185,15 +1340,6 @@ int mg2di_egs_init(EGSContext *ctx, int flags) goto finish; } - if (r->relax_factor == 0.0) - priv->r.relax_factor = relax_factors[ctx->fd_stencil - 1]; - else - priv->r.relax_factor = r->relax_factor; - priv->r.relax_factor *= ctx->step[0] * ctx->step[0]; - - if (r->relax_multiplier > 0.0) - priv->r.relax_factor *= r->relax_multiplier; - priv->r.line_add = line_madd_c; #if HAVE_EXTERNAL_ASM if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) @@ -1228,6 +1374,11 @@ int mg2di_egs_init(EGSContext *ctx, int flags) arg.dc = MG2D_DIFF_COEFF_11; arg.fact = 1.0 / (fd_denoms[ctx->fd_stencil - 1][MG2D_DIFF_COEFF_11] * ctx->step[0] * ctx->step[1]); tp_execute(ctx->tp, ctx->domain_size[1], init_diff_coeffs_task, &arg); + + if (r->relax_factor > 0.0) { + double cfl_step = r->relax_factor * r->relax_multiplier; + sc_init(&priv->sc, cfl_step, cfl_step * 1e-3); + } } if (!(flags & EGS_INIT_FLAG_SAME_DIFF_COEFFS)) @@ -1303,6 +1454,16 @@ int mg2di_egs_init(EGSContext *ctx, int flags) if (ret < 0) goto finish; + SWAP(priv->u_exterior[0], priv->u_exterior[priv->iteration_cur % ARRAY_ELEMS(priv->u_exterior)], NDArray*); + SWAP(priv->u_interior[0], priv->u_interior[priv->iteration_cur % ARRAY_ELEMS(priv->u_interior)], NDArray*); + priv->iteration_cur = 0; + priv->u_iteration[0] = 0; + priv->residual_iteration = -1; + for (int i = 1; i < ARRAY_ELEMS(priv->u_iteration); i++) { + priv->u_iteration[i] = -1; + priv->residual_max[i] = -1; + } + finish: mg2di_timer_stop(&ctx->timer_init); @@ -1315,22 +1476,29 @@ finish: } boundaries_sync(ctx, ctx->rhs); - boundaries_apply(ctx, ctx->u, 1); - boundaries_sync(ctx, ctx->u); + for (int i = 0; i < ARRAY_ELEMS(priv->u_interior); i++) { + boundaries_apply(ctx, priv->u_interior[i], 1); + boundaries_sync(ctx, priv->u_interior[i]); + } priv->steps_since_sync = 0; - boundaries_apply(ctx, priv->u_next, 1); - boundaries_sync(ctx, priv->u_next); + priv->step[0] = sc_step_get(&priv->sc); + if (priv->step[0] < 0) { + mg2di_log(&ctx->logger, MG2D_LOG_ERROR, + "Minimum step reached. 0"); + return -EDOM; + } - residual_calc(ctx, 0); - boundaries_apply(ctx, priv->u_next, 0); + residual_calc(ctx, &priv->residual_max[0], priv->u_interior[1], priv->u_interior[0], priv->step[0]); + boundaries_apply(ctx, priv->u_interior[1], 0); + priv->u_iteration[1] = 1; + ctx->residual_max = priv->residual_max[0]; priv->steps_since_sync++; if (priv->steps_since_sync >= priv->steps_per_sync) { - boundaries_sync(ctx, priv->u_next); + boundaries_sync(ctx, priv->u_interior[1]); priv->steps_since_sync = 0; } - priv->u_next_valid = 1; } mg2di_timer_stop(&ctx->timer_solve); @@ -1347,7 +1515,7 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) Slice slice_exterior[2]; size_t size_alloc[2]; - size_t size_dc[2]; + size_t size_dc[2], size_u[2]; size_t extra_points[4]; int ret; @@ -1372,26 +1540,30 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) slice_interior[1].end = slice_interior[1].start + dc->interior.size[0]; slice_interior[1].step = 1; - ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_alloc, NDARRAY_ALLOC_ZERO); - if (ret < 0) - return ret; - ret = mg2di_ndarray_alloc(&priv->u_next_base, 2, size_alloc, NDARRAY_ALLOC_ZERO); - if (ret < 0) - return ret; + size_u[0] = size_alloc[0] * HIST_SIZE; + size_u[1] = size_alloc[1]; - ret = mg2di_ndarray_slice(&ctx->u, priv->u_base, slice_interior); - if (ret < 0) - return ret; - ret = mg2di_ndarray_slice(&priv->u_next, priv->u_next_base, slice_interior); + ret = mg2di_ndarray_alloc(&priv->u_base, 2, size_u, NDARRAY_ALLOC_ZERO); if (ret < 0) return ret; - ret = mg2di_ndarray_slice(&ctx->u_exterior, priv->u_base, slice_exterior); - if (ret < 0) - return ret; - ret = mg2di_ndarray_slice(&priv->u_next_exterior, priv->u_next_base, slice_exterior); - if (ret < 0) - return ret; + for (int i = 0; i < ARRAY_ELEMS(priv->u_exterior); i++) { + const Slice slice_u[2] = { SLICE(i * size_alloc[0], (i + 1) * size_alloc[0], 1), SLICE_NULL }; + NDArray *tmp; + + ret = mg2di_ndarray_slice(&tmp, priv->u_base, slice_u); + if (ret < 0) + return ret; + + ret = mg2di_ndarray_slice(&priv->u_exterior[i], tmp, slice_exterior); + if (ret >= 0) + ret = mg2di_ndarray_slice(&priv->u_interior[i], tmp, slice_interior); + mg2di_ndarray_free(&tmp); + if (ret < 0) + return ret; + } + ctx->u = priv->u_interior[0]; + ctx->u_exterior = priv->u_exterior[0]; ret = mg2di_ndarray_alloc(&priv->rhs_base, 2, size_alloc, 0); if (ret < 0) @@ -1522,6 +1694,8 @@ static EGSContext *egs_alloc(const DomainGeometry *dg, unsigned int local_compon if (!ctx->priv->rescalc) goto fail; + sc_reset(&ctx->priv->sc); + mg2di_timer_init(&ctx->timer_bnd); mg2di_timer_init(&ctx->timer_bnd_fixval); mg2di_timer_init(&ctx->timer_bnd_falloff); @@ -1601,12 +1775,11 @@ void mg2di_egs_free(EGSContext **pctx) exact_arrays_free(ctx); - mg2di_ndarray_free(&ctx->u); - mg2di_ndarray_free(&ctx->u_exterior); + for (int i = 0; i < ARRAY_ELEMS(ctx->priv->u_exterior); i++) { + mg2di_ndarray_free(&ctx->priv->u_exterior[i]); + mg2di_ndarray_free(&ctx->priv->u_interior[i]); + } mg2di_ndarray_free(&ctx->priv->u_base); - mg2di_ndarray_free(&ctx->priv->u_next); - mg2di_ndarray_free(&ctx->priv->u_next_exterior); - mg2di_ndarray_free(&ctx->priv->u_next_base); mg2di_ndarray_free(&ctx->rhs); mg2di_ndarray_free(&ctx->priv->rhs_base); -- cgit v1.2.3