/* * Multigrid solver for a 2nd order 2D linear PDE. * Copyright 2018 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include "common.h" #include "cpu.h" #include "ell_grid_solve.h" #include "log.h" #include "mg2d.h" #include "mg2d_boundary.h" #include "mg2d_boundary_internal.h" #include "mg2d_constants.h" #include "ndarray.h" #include "timer.h" #include "transfer.h" typedef struct MG2DLevel { unsigned int depth; EGSContext *solver; int egs_init_flags; GridTransferContext *transfer_restrict; GridTransferContext *transfer_prolong; NDArray *prolong_tmp_base; NDArray *prolong_tmp; struct MG2DLevel *child; /* timings */ int64_t count_cycles; Timer timer_solve; Timer timer_prolong; Timer timer_restrict; Timer timer_correct; Timer timer_reinit; } MG2DLevel; struct MG2DInternal { MG2DLogger logger; TPContext *tp; MG2DLevel *root; NDArray *u; NDArray *rhs; NDArray *diff_coeffs[MG2D_DIFF_COEFF_NB]; GridTransferContext *transfer_init; int cpuflags; Timer timer_solve; Timer timer_levels_init; }; static void log_callback(MG2DLogger *log, int level, const char *fmt, va_list vl) { MG2DContext *ctx = log->opaque; ctx->log_callback(ctx, level, fmt, vl); } static void log_relax_step(MG2DContext *ctx, MG2DLevel *level, const char *step_desc, double res_old, double res_new) { char prefix[32] = { 0 }; for (int i = 0; i < MIN(sizeof(prefix) - 1, level->depth); i++) prefix[i] = ' '; mg2di_log(&ctx->priv->logger, MG2D_LOG_DEBUG, "%s[%d]%s %g->%g (%g)\n", prefix, level->depth, step_desc, res_old, res_new, res_old / res_new); } static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { MG2DLevel *level = arg; for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) { const ptrdiff_t idx_dst = job_idx * level->solver->u->stride[0] + idx0; const ptrdiff_t idx_src = job_idx * level->prolong_tmp->stride[0] + idx0; level->solver->u->data[idx_dst] -= level->prolong_tmp->data[idx_src]; } return 0; } static int mg_relax_step(MG2DContext *ctx, MG2DLevel *level, const char *step_desc, int export_res) { double res_old; int ret; res_old = level->solver->residual_max; mg2di_timer_start(&level->timer_solve); ret = mg2di_egs_solve(level->solver, export_res); mg2di_timer_stop(&level->timer_solve); if (ret < 0) return ret; log_relax_step(ctx, level, step_desc, res_old, level->solver->residual_max); return 0; } static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level) { double res_old, res_new; int ret; /* on the refined levels, use zero as the initial guess for the * solution (correction for the upper level) */ if (level->depth > 0) { mg2di_timer_start(&level->timer_reinit); memset(level->solver->u->data, 0, sizeof(*level->solver->u->data) * level->solver->u->stride[0] * level->solver->domain_size[1]); /* re-init the current-level solver (re-calc the residual) */ ret = mg2di_egs_init(level->solver, level->egs_init_flags); if (ret < 0) return ret; mg2di_timer_stop(&level->timer_reinit); level->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS; } res_old = level->solver->residual_max; /* handle coarsest grid */ if (!level->child) { ret = mg_relax_step(ctx, level, "coarse-step", 1); if (ret < 0) return ret; level->count_cycles++; goto finish; } for (int i = 0; i < ctx->nb_cycles; i++) { double res_prev; /* pre-restrict relaxation */ for (int j = 0; j < ctx->nb_relax_pre; j++) { ret = mg_relax_step(ctx, level, "pre-relax", j == ctx->nb_relax_pre - 1); if (ret < 0) return ret; } /* restrict the residual as to the coarser-level rhs */ mg2di_timer_start(&level->timer_restrict); ret = mg2di_gt_transfer(level->transfer_restrict, level->child->solver->rhs, level->solver->residual); mg2di_timer_stop(&level->timer_restrict); if (ret < 0) return ret; /* solve on the coarser level */ ret = mg_solve_subgrid(ctx, level->child); if (ret < 0) return ret; /* prolongate the coarser-level correction */ mg2di_timer_start(&level->timer_prolong); ret = mg2di_gt_transfer(level->transfer_prolong, level->prolong_tmp, level->child->solver->u); mg2di_timer_stop(&level->timer_prolong); if (ret < 0) return ret; /* apply the correction */ mg2di_timer_start(&level->timer_correct); tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level); mg2di_timer_stop(&level->timer_correct); /* re-init the current-level solver (re-calc the residual) */ res_prev = level->solver->residual_max; mg2di_timer_start(&level->timer_reinit); ret = mg2di_egs_init(level->solver, level->egs_init_flags); mg2di_timer_stop(&level->timer_reinit); if (ret < 0) return ret; level->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS; log_relax_step(ctx, level, "correct", res_prev, level->solver->residual_max); /* post-correct relaxation */ for (int j = 0; j < ctx->nb_relax_post; j++) { ret = mg_relax_step(ctx, level, "post-relax", 0); if (ret < 0) return ret; } level->count_cycles++; } finish: res_new = level->solver->residual_max; if (!isfinite(res_new) || (res_new > 1e2 * DBL_EPSILON && res_old / res_new <= 1e-1)) { mg2di_log(&ctx->priv->logger, MG2D_LOG_ERROR, "The relaxation step at level %d has diverged: %g -> %g\n", level->depth, res_old, res_new); return MG2D_ERR_DIVERGE; } return 0; } static void bnd_zero(MG2DBoundary *bdst, size_t nb_rows, size_t domain_size) { for (size_t i = 0; i < nb_rows; i++) { memset(bdst->val + i * bdst->val_stride - i, 0, sizeof(*bdst->val) * (domain_size + 2 * i)); } } static void bnd_copy(MG2DBoundary *bdst, const double *src, ptrdiff_t src_stride, size_t nb_rows, size_t domain_size) { for (size_t i = 0; i < nb_rows; i++) { memcpy(bdst->val + i * bdst->val_stride - i, src + i * src_stride - i, sizeof(*bdst->val) * (domain_size + 2 * i)); } } static int restrict_diff_coeffs(MG2DContext *ctx, enum GridTransferOperator op, EGSContext *dst, EGSContext *src) { MG2DInternal *priv = ctx->priv; GridTransferContext *tc; int ret = 0; tc = mg2di_gt_alloc(op); if (!tc) return -ENOMEM; tc->tp = priv->tp; tc->cpuflags = priv->cpuflags; tc->src.size[0] = src->domain_size[0]; tc->src.size[1] = src->domain_size[1]; tc->src.step[0] = src->step[0]; tc->src.step[1] = src->step[1]; tc->dst.size[0] = dst->domain_size[0]; tc->dst.size[1] = dst->domain_size[1]; tc->dst.step[0] = dst->step[0]; tc->dst.step[1] = dst->step[1]; ret = mg2di_gt_init(tc); if (ret < 0) goto finish; for (int i = 0; i < MG2D_DIFF_COEFF_NB; i++) { ret = mg2di_gt_transfer(tc, dst->diff_coeffs[i], src->diff_coeffs[i]); if (ret < 0) goto finish; } finish: mg2di_gt_free(&tc); return ret; } static double findmax(double *arr, size_t size[2], ptrdiff_t stride) { double ret = 0.0; for (size_t y = 0; y < size[1]; y++) for (size_t x = 0; x < size[0]; x++) { double val = fabs(arr[y * stride + x]); if (val > ret) ret = val; } return ret; } static int mg_levels_init(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; MG2DLevel *cur, *prev; double diff0_max, diff2_max, tmp; int ret; diff0_max = findmax(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], priv->root->solver->domain_size, ctx->diff_coeffs_stride); diff2_max = findmax(ctx->diff_coeffs[MG2D_DIFF_COEFF_20], priv->root->solver->domain_size, ctx->diff_coeffs_stride); tmp = findmax(ctx->diff_coeffs[MG2D_DIFF_COEFF_02], priv->root->solver->domain_size, ctx->diff_coeffs_stride); diff2_max = MAX(diff2_max, tmp); cur = priv->root; prev = NULL; if (priv->u) { mg2di_ndarray_copy(cur->solver->u, priv->u); mg2di_ndarray_free(&priv->u); ctx->u = cur->solver->u->data; ctx->u_stride = cur->solver->u->stride[0]; mg2di_ndarray_copy(cur->solver->rhs, priv->rhs); mg2di_ndarray_free(&priv->rhs); ctx->rhs = cur->solver->rhs->data; ctx->rhs_stride = cur->solver->rhs->stride[0]; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { mg2di_ndarray_copy(cur->solver->diff_coeffs[i], priv->diff_coeffs[i]); mg2di_ndarray_free(&priv->diff_coeffs[i]); ctx->diff_coeffs[i] = cur->solver->diff_coeffs[i]->data; } ctx->diff_coeffs_stride = cur->solver->diff_coeffs[0]->stride[0]; } while (cur) { if (!prev) { cur->solver->step[0] = ctx->step[0]; cur->solver->step[1] = ctx->step[1]; } else { cur->solver->step[0] = prev->solver->step[0] * ((double)(prev->solver->domain_size[0] - 1) / (cur->solver->domain_size[0] - 1)); cur->solver->step[1] = prev->solver->step[1] * ((double)(prev->solver->domain_size[1] - 1) / (cur->solver->domain_size[1] - 1)); } for (int i = 0; i < ARRAY_ELEMS(cur->solver->boundaries); i++) { MG2DBoundary *bsrc = ctx->boundaries[i]; MG2DBoundary *bdst = cur->solver->boundaries[i]; bdst->type = bsrc->type; switch (bsrc->type) { case MG2D_BC_TYPE_FIXVAL: if (!prev) bnd_copy(bdst, bsrc->val, bsrc->val_stride, ctx->fd_stencil, cur->solver->domain_size[0]); else bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]); break; case MG2D_BC_TYPE_REFLECT: case MG2D_BC_TYPE_FALLOFF: bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]); break; default: return -ENOSYS; } } cur->solver->logger = priv->logger; cur->solver->cpuflags = priv->cpuflags; cur->solver->tp = priv->tp; cur->solver->fd_stencil = ctx->fd_stencil; if (cur->solver->solver_type == EGS_SOLVER_RELAXATION) { EGSRelaxContext *r = cur->solver->solver_data; r->relax_factor = ctx->cfl_factor; r->relax_multiplier = 1.0 / (diff2_max + cur->solver->step[0] * cur->solver->step[1] * diff0_max / 8.0); } prev = cur; cur = cur->child; } cur = priv->root; while (cur) { mg2di_gt_free(&cur->transfer_restrict); mg2di_gt_free(&cur->transfer_prolong); if (cur->child) { enum GridTransferOperator op_interp; enum GridTransferOperator op_restrict; switch (ctx->fd_stencil) { case 1: op_interp = GRID_TRANSFER_LAGRANGE_3; break; case 2: op_interp = GRID_TRANSFER_LAGRANGE_5; break; default: return -ENOSYS; } if (cur->solver->domain_size[0] == 2 * (cur->child->solver->domain_size[0] - 1) + 1) { if (ctx->fd_stencil == 1) op_restrict = GRID_TRANSFER_FW_3; else op_restrict = GRID_TRANSFER_FW_5; } else op_restrict = op_interp; cur->transfer_restrict = mg2di_gt_alloc(op_restrict); if (!cur->transfer_restrict) return -ENOMEM; cur->transfer_restrict->tp = priv->tp; cur->transfer_restrict->cpuflags = priv->cpuflags; cur->transfer_restrict->src.size[0] = cur->solver->domain_size[0]; cur->transfer_restrict->src.size[1] = cur->solver->domain_size[1]; cur->transfer_restrict->src.step[0] = cur->solver->step[0]; cur->transfer_restrict->src.step[1] = cur->solver->step[1]; cur->transfer_restrict->dst.size[0] = cur->child->solver->domain_size[0]; cur->transfer_restrict->dst.size[1] = cur->child->solver->domain_size[1]; cur->transfer_restrict->dst.step[0] = cur->child->solver->step[0]; cur->transfer_restrict->dst.step[1] = cur->child->solver->step[1]; ret = mg2di_gt_init(cur->transfer_restrict); if (ret < 0) return ret; cur->transfer_prolong = mg2di_gt_alloc(op_interp); if (!cur->transfer_prolong) return -ENOMEM; cur->transfer_prolong->tp = priv->tp; cur->transfer_prolong->cpuflags = priv->cpuflags; cur->transfer_prolong->dst.size[0] = cur->solver->domain_size[0]; cur->transfer_prolong->dst.size[1] = cur->solver->domain_size[1]; cur->transfer_prolong->dst.step[0] = cur->solver->step[0]; cur->transfer_prolong->dst.step[1] = cur->solver->step[1]; cur->transfer_prolong->src.size[0] = cur->child->solver->domain_size[0]; cur->transfer_prolong->src.size[1] = cur->child->solver->domain_size[1]; cur->transfer_prolong->src.step[0] = cur->child->solver->step[0]; cur->transfer_prolong->src.step[1] = cur->child->solver->step[1]; ret = mg2di_gt_init(cur->transfer_prolong); if (ret < 0) return ret; ret = restrict_diff_coeffs(ctx, op_interp, cur->child->solver, cur->solver); if (ret < 0) return ret; } cur->egs_init_flags &= ~EGS_INIT_FLAG_SAME_DIFF_COEFFS; cur = cur->child; } return 0; } static int threadpool_init(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; int ret; ret = tp_init(&priv->tp, ctx->nb_threads); if (ret < 0) return ret; return 0; } static int mg_levels_alloc(MG2DContext *ctx, size_t domain_size); int mg2d_solve(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; MG2DLevel *root; EGSContext *s_root; double res_orig, res_prev, res_cur; int ret; if (!priv->root) { ret = mg_levels_alloc(ctx, ctx->domain_size); if (ret < 0) return ret; } if (!priv->tp) { ret = threadpool_init(ctx); if (ret < 0) return ret; } root = priv->root; s_root = root->solver; mg2di_timer_start(&priv->timer_solve); mg2di_timer_start(&priv->timer_levels_init); ret = mg_levels_init(ctx); mg2di_timer_stop(&priv->timer_levels_init); if (ret < 0) goto finish; mg2di_timer_start(&root->timer_reinit); ret = mg2di_egs_init(s_root, 0); mg2di_timer_stop(&root->timer_reinit); if (ret < 0) goto finish; root->egs_init_flags |= EGS_INIT_FLAG_SAME_DIFF_COEFFS; res_orig = s_root->residual_max; res_prev = res_orig; for (int i = 0; i < ctx->maxiter; i++) { ret = mg_solve_subgrid(ctx, root); if (ret < 0) goto fail; res_cur = s_root->residual_max; if (res_cur < ctx->tol) { mg2di_log(&priv->logger, MG2D_LOG_INFO, "converged on iteration %d, residual %g\n", i, res_cur); //mg2di_ndarray_copy(priv->u, s_root->u); goto finish; } mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "finished toplevel iteration %d, residual %g -> %g (%g)\n", i, res_prev, res_cur, res_prev / res_cur); if (res_cur / res_prev > 1e1 || res_cur / res_orig > 1e3) { mg2di_log(&priv->logger, MG2D_LOG_ERROR, "A multigrid iteration diverged\n"); ret = MG2D_ERR_DIVERGE; goto fail; } res_prev = res_cur; } ret = -EDOM; mg2di_log(&priv->logger, MG2D_LOG_ERROR, "Maximum number of iterations (%d) reached\n", ctx->maxiter); fail: mg2di_log(&priv->logger, MG2D_LOG_ERROR, "The solver failed to converge\n"); finish: mg2di_timer_stop(&priv->timer_solve); return ret; } static void mg_level_free(MG2DLevel **plevel) { MG2DLevel *level = *plevel; if (!level) return; mg2di_ndarray_free(&level->prolong_tmp); mg2di_ndarray_free(&level->prolong_tmp_base); mg2di_egs_free(&level->solver); mg2di_gt_free(&level->transfer_restrict); mg2di_gt_free(&level->transfer_prolong); free(level); *plevel = NULL; } static MG2DLevel *mg_level_alloc(enum EGSType type, const size_t domain_size) { MG2DLevel *level; int ret; level = calloc(1, sizeof(*level)); if (!level) return NULL; ret = mg2di_ndarray_alloc(&level->prolong_tmp_base, 2, (size_t [2]){domain_size + 1, domain_size + 1}, 0); if (ret < 0) goto fail; ret = mg2di_ndarray_slice(&level->prolong_tmp, level->prolong_tmp_base, (Slice [2]){ SLICE(0, -1, 1), SLICE(0, -1, 1) }); if (ret < 0) goto fail; level->solver = mg2di_egs_alloc(type, (size_t [2]){domain_size, domain_size}); if (!level->solver) goto fail; mg2di_timer_init(&level->timer_solve); mg2di_timer_init(&level->timer_prolong); mg2di_timer_init(&level->timer_restrict); mg2di_timer_init(&level->timer_correct); mg2di_timer_init(&level->timer_reinit); return level; fail: mg_level_free(&level); return NULL; } static int log2i(int n) { int ret = 0; while (n) { ret++; n >>= 1; } return ret - 1; } static int mg_levels_alloc(MG2DContext *ctx, size_t domain_size) { MG2DInternal *priv = ctx->priv; MG2DLevel **dst; size_t next_size; dst = &priv->root; next_size = domain_size; for (int depth = 0; depth < ctx->max_levels; depth++) { enum EGSType cur_type; size_t cur_size = next_size; // choose the domain size for the next child // the children all have sizes 2**n + 1 // but on the top level we skip the closest lower one if it is too close if (depth == 0) { next_size = (1 << log2i(cur_size - 2)) + 1; if ((double)cur_size / next_size < 1.5) next_size = (next_size >> 1) + 1; } else next_size = (cur_size >> 1) + 1; cur_type = (cur_size <= ctx->max_exact_size) ? EGS_SOLVER_EXACT : EGS_SOLVER_RELAXATION; *dst = mg_level_alloc(cur_type, cur_size); if (!*dst) return -ENOMEM; (*dst)->depth = depth; if (cur_type == EGS_SOLVER_EXACT) break; if (next_size <= 4) break; dst = &(*dst)->child; } return 0; } static void log_default_callback(const MG2DContext *ctx, int level, const char *fmt, va_list vl) { if (level > ctx->log_level) return; vfprintf(stderr, fmt, vl); } MG2DContext *mg2d_solver_alloc(size_t domain_size) { MG2DContext *ctx; MG2DInternal *priv; int ret; if (domain_size < 3 || SIZE_MAX / domain_size < domain_size) return NULL; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->priv = calloc(1, sizeof(*ctx->priv)); if (!ctx->priv) goto fail; priv = ctx->priv; priv->logger.log = log_callback; priv->logger.opaque = ctx; priv->cpuflags = mg2di_cpu_flags_get(); for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { ctx->boundaries[i] = mg2di_bc_alloc(domain_size); if (!ctx->boundaries[i]) { goto fail; } } ret = mg2di_ndarray_alloc(&priv->u, 2, (size_t [2]){ domain_size, domain_size }, NDARRAY_ALLOC_ZERO); if (ret < 0) goto fail; ctx->u = priv->u->data; ctx->u_stride = priv->u->stride[0]; ret = mg2di_ndarray_alloc(&priv->rhs, 2, (size_t [2]){ domain_size, domain_size }, NDARRAY_ALLOC_ZERO); if (ret < 0) goto fail; ctx->rhs = priv->rhs->data; ctx->rhs_stride = priv->rhs->stride[0]; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { ret = mg2di_ndarray_alloc(&priv->diff_coeffs[i], 2, (size_t [2]){ domain_size, domain_size }, NDARRAY_ALLOC_ZERO); if (ret < 0) goto fail; ctx->diff_coeffs[i] = priv->diff_coeffs[i]->data; } ctx->diff_coeffs_stride = priv->diff_coeffs[0]->stride[0]; ctx->domain_size = domain_size; ctx->max_levels = 16; ctx->max_exact_size = 5; ctx->maxiter = 16; ctx->tol = 1e-12; ctx->nb_cycles = 1; ctx->nb_relax_pre = 1; ctx->nb_relax_post = 1; ctx->log_callback = log_default_callback; ctx->log_level = MG2D_LOG_INFO; ctx->nb_threads = 1; mg2di_timer_init(&priv->timer_levels_init); mg2di_timer_init(&priv->timer_solve); return ctx; fail: mg2d_solver_free(&ctx); return NULL; } void mg2d_solver_free(MG2DContext **pctx) { MG2DContext *ctx = *pctx; if (!ctx) return; while (ctx->priv->root) { MG2DLevel *next = ctx->priv->root->child; mg_level_free(&ctx->priv->root); ctx->priv->root = next; } for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) mg2di_bc_free(&ctx->boundaries[i]); tp_free(&ctx->priv->tp); mg2di_gt_free(&ctx->priv->transfer_init); mg2di_ndarray_free(&ctx->priv->u); mg2di_ndarray_free(&ctx->priv->rhs); for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs); i++) mg2di_ndarray_free(&ctx->priv->diff_coeffs[i]); free(ctx->priv); free(ctx); *pctx = NULL; } void mg2d_print_stats(MG2DContext *ctx, const char *prefix) { MG2DInternal *priv = ctx->priv; MG2DLevel *level = priv->root; int64_t other, levels_total = 0; if (!level) return; if (!prefix) prefix = ""; mg2di_log(&priv->logger, ctx->log_level, "%s%ld solves; %g s total time; %g ms avg per call; %g avg cycles per solve\n", prefix, priv->timer_solve.nb_runs, priv->timer_solve.time_nsec / 1e9, priv->timer_solve.time_nsec / 1e6 / priv->timer_solve.nb_runs, (double)level->count_cycles / priv->timer_solve.nb_runs); while (level) { char buf[1024], *p; int ret; EGSRelaxContext *r = NULL; EGSExactContext *e = NULL; int64_t level_total = level->timer_solve.time_nsec + level->timer_prolong.time_nsec + level->timer_restrict.time_nsec + level->timer_correct.time_nsec + level->timer_reinit.time_nsec; if (level->solver->solver_type == EGS_SOLVER_RELAXATION) r = level->solver->solver_data; else if (level->solver->solver_type == EGS_SOLVER_EXACT) e = level->solver->solver_data; levels_total += level_total; p = buf; ret = snprintf(p, sizeof(buf) - (p - buf), "%2.2f%% level %d: %ld cycles %g s total time %g ms avg per call", level_total * 100.0 / priv->timer_solve.time_nsec, level->depth, level->count_cycles, level_total / 1e9, level_total / 1e6 / level->count_cycles); if (ret > 0) p += ret; ret = snprintf(p, sizeof(buf) - (p - buf), "||%2.2f%% solve %2.2f%% reinit ", level->timer_solve.time_nsec * 100.0 / level_total, level->timer_reinit.time_nsec * 100.0 / level_total); if (ret > 0) p += ret; if (level->child) { ret = snprintf(p, sizeof(buf) - (p - buf), "%2.2f%% prolong %2.2f%% restrict %2.2f%% correct", level->timer_prolong.time_nsec * 100.0 / level_total, level->timer_restrict.time_nsec * 100.0 / level_total, level->timer_correct.time_nsec * 100.0 / level_total); if (ret > 0) p += ret; } ret = snprintf(p, sizeof(buf) - (p - buf), "||%2.2f%% init %2.2f%% residual %2.2f%% boundaries (%2.2f%% fixval %2.2f%% reflect %2.2f%% falloff %2.2f%% corners)", level->solver->timer_init.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_res_calc.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_bnd.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_bnd_fixval.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_bnd_reflect.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_bnd_falloff.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, level->solver->timer_bnd_corners.time_nsec * 100.0 / level->solver->timer_solve.time_nsec); if (ret > 0) p += ret; if (r) { ret = snprintf(p, sizeof(buf) - (p - buf), " %2.2f%% correct", r->timer_correct.time_nsec * 100.0 / level->solver->timer_solve.time_nsec); if (ret > 0) p += ret; } else if (e) { ret = snprintf(p, sizeof(buf) - (p - buf), " %2.2f%% const %2.2f%% bicgstab (%ld; %g it/slv) %2.2f%% lu (%ld) %2.2f%% export", e->timer_mat_construct.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, e->timer_bicgstab.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, e->timer_bicgstab.nb_runs, (double)e->bicgstab_iterations / e->timer_bicgstab.nb_runs, e->timer_lu_solve.time_nsec * 100.0 / level->solver->timer_solve.time_nsec, e->timer_lu_solve.nb_runs, e->timer_export.time_nsec * 100.0 / level->solver->timer_solve.time_nsec); if (ret > 0) p += ret; } mg2di_log(&priv->logger, ctx->log_level, "%s%s\n", prefix, buf); level = level->child; } mg2di_log(&priv->logger, ctx->log_level, "%s%2.2f%% levels init %g s total time %g ms avg per call\n", prefix, priv->timer_levels_init.time_nsec * 100.0 / priv->timer_solve.time_nsec, priv->timer_levels_init.time_nsec / 1e9, priv->timer_levels_init.time_nsec / 1e6 / priv->timer_levels_init.nb_runs); other = priv->timer_solve.time_nsec - levels_total - priv->timer_levels_init.time_nsec; mg2di_log(&priv->logger, ctx->log_level, "%s%2.2f%% other %g s total time %g ms avg per call\n", prefix, other * 100.0 / priv->timer_solve.time_nsec, other / 1e9, other / 1e6 / priv->timer_solve.nb_runs); } unsigned int mg2d_max_fd_stencil(void) { return FD_STENCIL_MAX; } int mg2d_init_guess(MG2DContext *ctx, const double *src, ptrdiff_t src_stride, const size_t src_size[2], const double src_step[2]) { MG2DInternal *priv = ctx->priv; NDArray *a_src; int ret; if (!priv->tp) { ret = threadpool_init(ctx); if (ret < 0) return ret; } if (priv->transfer_init && (priv->transfer_init->src.size[0] != src_size[0] || priv->transfer_init->src.size[1] != src_size[1] || fabs(priv->transfer_init->src.step[0] - src_step[0]) > 1e-15 || fabs(priv->transfer_init->src.step[1] - src_step[1]) > 1e-15)) { mg2di_gt_free(&priv->transfer_init); } if (!priv->transfer_init) { priv->transfer_init = mg2di_gt_alloc(GRID_TRANSFER_LAGRANGE_3); if (!priv->transfer_init) return -ENOMEM; priv->transfer_init->tp = priv->tp; priv->transfer_init->cpuflags = priv->cpuflags; priv->transfer_init->src.size[0] = src_size[0]; priv->transfer_init->src.size[1] = src_size[1]; priv->transfer_init->src.step[0] = src_step[0]; priv->transfer_init->src.step[1] = src_step[1]; priv->transfer_init->dst.size[0] = ctx->domain_size; priv->transfer_init->dst.size[1] = ctx->domain_size; priv->transfer_init->dst.step[0] = ctx->step[0]; priv->transfer_init->dst.step[1] = ctx->step[1]; ret = mg2di_gt_init(priv->transfer_init); if (ret < 0) return ret; } ret = mg2di_ndarray_wrap(&a_src, 2, src_size, src, (ptrdiff_t [2]){ src_stride, 1 }); if (ret < 0) return ret; ret = mg2di_gt_transfer(priv->transfer_init, priv->u ? priv->u : priv->root->solver->u, a_src); mg2di_ndarray_free(&a_src); return ret; }