/* * Multigrid solver for a 2nd order 2D linear PDE. * Copyright 2018 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include "common.h" #include "cpu.h" #include "ell_grid_solve.h" #include "log.h" #include "mg2d.h" #include "mg2d_boundary.h" #include "mg2d_boundary_internal.h" #include "mg2d_constants.h" #define LEVELS_MAX 16 typedef struct MG2DLevel { unsigned int depth; EGSContext *solver; double *prolong_tmp; ptrdiff_t prolong_tmp_stride; struct MG2DLevel *child; /* timings */ int64_t count_cycles; int64_t time_relax; int64_t time_prolong; int64_t time_restrict; int64_t time_correct; int64_t time_reinit; } MG2DLevel; struct MG2DInternal { MG2DLogger logger; TPContext *tp; MG2DLevel *root; int cpuflags; double fac00_max; int64_t time_solve; int64_t count_solve; }; static void log_callback(MG2DLogger *log, int level, const char *fmt, va_list vl) { MG2DContext *ctx = log->opaque; ctx->log_callback(ctx, level, fmt, vl); } static void log_relax_step(MG2DContext *ctx, MG2DLevel *level, const char *step_desc, double res_old, double res_new) { char prefix[32] = { 0 }; for (int i = 0; i < MIN(sizeof(prefix) - 1, level->depth); i++) prefix[i] = ' '; mg2di_log(&ctx->priv->logger, MG2D_LOG_DEBUG, "%s[%d]%s %g->%g (%g)\n", prefix, level->depth, step_desc, res_old, res_new, res_old / res_new); } static void mg_restrict_inject(EGSContext *coarse, double *dst, ptrdiff_t dst_stride, EGSContext *fine, const double *src, ptrdiff_t src_stride) { for (size_t j = 0; j < coarse->domain_size[1]; j++) for (size_t i = 0; i < coarse->domain_size[0]; i++) { const ptrdiff_t idx_dst = j * dst_stride + i; const size_t fine_i = i * 2; const size_t fine_j = j * 2; const ptrdiff_t idx_src = fine_j * src_stride + fine_i; dst[idx_dst] = src[idx_src]; } } static void mg_restrict_fw(EGSContext *coarse, double *dst, ptrdiff_t dst_stride, EGSContext *fine, const double *src, ptrdiff_t src_stride) { for (size_t j = 0; j < coarse->domain_size[1]; j++) for (size_t i = 0; i < coarse->domain_size[0]; i++) { const ptrdiff_t idx_dst = j * dst_stride + i; const size_t fine_i = i * 2; const size_t fine_j = j * 2; const ptrdiff_t idx_src = fine_j * src_stride + fine_i; dst[idx_dst] = 0.25 * src[idx_src] + 0.125 * (src[idx_src + 1] + src[idx_src - 1] + src[idx_src + src_stride] + src[idx_src - src_stride]) + 0.0625 * (src[idx_src + 1 + src_stride] + src[idx_src - 1 + src_stride] + src[idx_src + 1 - src_stride] + src[idx_src - 1 - src_stride]); } } static void mg_prolongate(EGSContext *fine, double *dst, ptrdiff_t dst_stride, EGSContext *coarse, const double *src, ptrdiff_t src_stride) { // for simplicity, this code writes one point over the domain size; // this allows us to avoid treating the boundary layers specially // dst is allocated with extra ghost zones for this purpose for (size_t j = 0; j < coarse->domain_size[1]; j++) for (size_t i = 0; i < coarse->domain_size[0]; i++) { const ptrdiff_t idx_src = j * src_stride + i; const ptrdiff_t idx_dst = 2 * (j * dst_stride + i); const double src00 = src[idx_src]; const double src10 = src[idx_src + 1]; const double src01 = src[idx_src + src_stride]; const double src11 = src[idx_src + src_stride + 1]; dst[idx_dst] = src00; dst[idx_dst + 1] = 0.5 * (src00 + src10); dst[idx_dst + dst_stride] = 0.5 * (src00 + src01); dst[idx_dst + dst_stride + 1] = 0.25 * (src00 + src10 + src01 + src11); } } typedef struct InterpParamsBilin { size_t dst_domain_size[2]; ptrdiff_t src_stride; ptrdiff_t dst_stride; const double *src; double *dst; double dx_dst; double dy_dst; double dx_src; double dy_src; double scalex; double scaley; } InterpParamsBilin; static int mg_interp_bilinear_kernel(void *arg, unsigned int job_idx, unsigned int thread_idx) { InterpParamsBilin *ip = arg; unsigned int idx1_dst = job_idx; double *dst = ip->dst; const double *src = ip->src; const double dx_dst = ip->dx_dst; const double dy_dst = ip->dy_dst; const double dx_src = ip->dx_src; const double dy_src = ip->dy_src; const double scalex = ip->scalex; const double scaley = ip->scaley; const ptrdiff_t src_stride = ip->src_stride; const ptrdiff_t dst_stride = ip->dst_stride; const double y = idx1_dst * dy_dst; const size_t idx1_src = idx1_dst * scaley; const double y0 = idx1_src * dy_src; const double y1 = y0 + dy_src; const double fact_y0 = (y1 - y) / dy_src; const double fact_y1 = (y - y0) / dy_src; for (size_t idx0_dst = 0; idx0_dst < ip->dst_domain_size[0]; idx0_dst++) { const double x = idx0_dst * dx_dst; const size_t idx0_src = idx0_dst * scalex; const size_t idx_src = idx1_src * src_stride + idx0_src; const double x0 = idx0_src * dx_src; const double x1 = x0 + dx_src; const double fact_x0 = (x1 - x) / dx_src; const double fact_x1 = (x - x0) / dx_src; const double val = fact_x0 * (fact_y0 * src[idx_src] + fact_y1 * src[idx_src + src_stride]) + fact_x1 * (fact_y0 * src[idx_src + 1] + fact_y1 * src[idx_src + 1 + src_stride]); dst[idx1_dst * dst_stride + idx0_dst] = val; } return 0; } static void mg_interp_bilinear(TPContext *tp, EGSContext *ctx_dst, double *dst, ptrdiff_t dst_stride, EGSContext *ctx_src, const double *src, ptrdiff_t src_stride) { InterpParamsBilin ip; ip.dst_domain_size[0] = ctx_dst->domain_size[0]; ip.dst_domain_size[1] = ctx_dst->domain_size[1]; ip.src_stride = src_stride; ip.dst_stride = dst_stride; ip.dx_dst = ctx_dst->step[0]; ip.dy_dst = ctx_dst->step[1]; ip.dx_src = ctx_src->step[0]; ip.dy_src = ctx_src->step[1]; ip.scalex = ip.dx_dst / ip.dx_src; ip.scaley = ip.dy_dst / ip.dy_src; ip.src = src; ip.dst = dst; tp_execute(tp, ctx_dst->domain_size[1], mg_interp_bilinear_kernel, &ip); } static int coarse_correct_task(void *arg, unsigned int job_idx, unsigned int thread_idx) { MG2DLevel *level = arg; for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) { const ptrdiff_t idx_dst = job_idx * level->solver->u_stride + idx0; const ptrdiff_t idx_src = job_idx * level->prolong_tmp_stride + idx0; level->solver->u[idx_dst] -= level->prolong_tmp[idx_src]; } return 0; } static void restrict_residual(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src) { EGSContext *s_src = src->solver; EGSContext *s_dst = dst->solver; if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) { mg_restrict_fw(s_dst, s_dst->rhs, s_dst->rhs_stride, s_src, s_src->residual, s_src->residual_stride); } else { mg_interp_bilinear(ctx->priv->tp, s_dst, s_dst->rhs, s_dst->rhs_stride, s_src, s_src->residual, s_src->residual_stride); } } static void prolong_solution(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src) { EGSContext *s_src = src->solver; EGSContext *s_dst = dst->solver; if (s_dst->domain_size[0] == 2 * (s_src->domain_size[0] - 1) + 1) { mg_prolongate(s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, s_src, s_src->u, s_src->u_stride); } else { mg_interp_bilinear(ctx->priv->tp, s_dst, dst->prolong_tmp, dst->prolong_tmp_stride, s_src, s_src->u, s_src->u_stride); } } static int mg_relax_step(MG2DContext *ctx, MG2DLevel *level, const char *step_desc) { double res_old; int64_t start; int ret; res_old = level->solver->residual_max; start = gettime(); ret = mg2di_egs_solve(level->solver); level->time_relax += gettime() - start; if (ret < 0) return ret; log_relax_step(ctx, level, step_desc, res_old, level->solver->residual_max); return 0; } static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level) { double res_old, res_new; int ret; /* on the refined levels, use zero as the initial guess for the * solution (correction for the upper level) */ if (level->depth > 0) { memset(level->solver->u, 0, sizeof(*level->solver->u) * level->solver->u_stride * level->solver->domain_size[1]); /* re-init the current-level solver (re-calc the residual) */ ret = mg2di_egs_init(level->solver); if (ret < 0) return ret; } res_old = level->solver->residual_max; /* handle coarsest grid */ if (!level->child) { ret = mg_relax_step(ctx, level, "coarse-step"); if (ret < 0) return ret; level->count_cycles++; goto finish; } for (int i = 0; i < ctx->nb_cycles; i++) { double res_prev; int64_t start; /* pre-restrict relaxation */ for (int j = 0; j < ctx->nb_relax_pre; j++) { ret = mg_relax_step(ctx, level, "pre-relax"); if (ret < 0) return ret; } /* restrict the residual as to the coarser-level rhs */ start = gettime(); restrict_residual(ctx, level->child, level); level->time_restrict += gettime() - start; /* solve on the coarser level */ ret = mg_solve_subgrid(ctx, level->child); if (ret < 0) return ret; /* prolongate the coarser-level correction */ start = gettime(); prolong_solution(ctx, level, level->child); level->time_prolong += gettime() - start; /* apply the correction */ start = gettime(); tp_execute(ctx->priv->tp, level->solver->domain_size[1], coarse_correct_task, level); level->time_correct += gettime() - start; /* re-init the current-level solver (re-calc the residual) */ res_prev = level->solver->residual_max; start = gettime(); ret = mg2di_egs_init(level->solver); if (ret < 0) return ret; level->time_reinit += gettime() - start; log_relax_step(ctx, level, "correct", res_prev, level->solver->residual_max); /* post-correct relaxation */ for (int j = 0; j < ctx->nb_relax_post; j++) { ret = mg_relax_step(ctx, level, "post-relax"); if (ret < 0) return ret; } level->count_cycles++; } finish: res_new = level->solver->residual_max; if (!isfinite(res_new) || (res_new > DBL_EPSILON && res_old / res_new <= 1e-1)) { mg2di_log(&ctx->priv->logger, MG2D_LOG_ERROR, "The relaxation step at level %d has diverged: %g -> %g\n", level->depth, res_old, res_new); return MG2D_ERR_DIVERGE; } return 0; } static void bnd_zero(MG2DBoundary *bdst, size_t nb_rows, size_t domain_size) { for (size_t i = 0; i < nb_rows; i++) { memset(bdst->val + i * bdst->val_stride - i, 0, sizeof(*bdst->val) * (domain_size + 2 * i)); } } static void bnd_copy(MG2DBoundary *bdst, const double *src, ptrdiff_t src_stride, size_t nb_rows, size_t domain_size) { for (size_t i = 0; i < nb_rows; i++) { memcpy(bdst->val + i * bdst->val_stride - i, src + i * src_stride - i, sizeof(*bdst->val) * (domain_size + 2 * i)); } } static void restrict_diff_coeffs(MG2DContext *ctx, MG2DLevel *dst, MG2DLevel *src) { EGSContext *s_src = src->solver; EGSContext *s_dst = dst->solver; if (s_src->domain_size[0] == 2 * (s_dst->domain_size[0] - 1) + 1) { for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) { mg_restrict_inject(s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride, s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride); } } else { for (int i = 0; i < ARRAY_ELEMS(s_src->diff_coeffs); i++) { mg_interp_bilinear(ctx->priv->tp, s_dst, s_dst->diff_coeffs[i], s_dst->diff_coeffs_stride, s_src, s_src->diff_coeffs[i], s_src->diff_coeffs_stride); } } } static double findmax(double *arr, size_t size[2], ptrdiff_t stride) { double ret = 0.0; for (size_t y = 0; y < size[1]; y++) for (size_t x = 0; x < size[0]; x++) { double val = fabs(arr[y * stride + x]); if (val > ret) ret = val; } return ret; } static int mg_levels_init(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; MG2DLevel *cur, *prev; priv->fac00_max = findmax(ctx->diff_coeffs[MG2D_DIFF_COEFF_00], priv->root->solver->domain_size, ctx->diff_coeffs_stride); cur = priv->root; prev = NULL; while (cur) { if (!prev) { cur->solver->step[0] = ctx->step[0]; cur->solver->step[1] = ctx->step[1]; } else { cur->solver->step[0] = prev->solver->step[0] * ((double)(prev->solver->domain_size[0] - 1) / (cur->solver->domain_size[0] - 1)); cur->solver->step[1] = prev->solver->step[1] * ((double)(prev->solver->domain_size[1] - 1) / (cur->solver->domain_size[1] - 1)); } /* Set the equation coefficients. */ if (prev) restrict_diff_coeffs(ctx, cur, prev); for (int i = 0; i < ARRAY_ELEMS(cur->solver->boundaries); i++) { MG2DBoundary *bsrc = ctx->boundaries[i]; MG2DBoundary *bdst = cur->solver->boundaries[i]; bdst->type = bsrc->type; switch (bsrc->type) { case MG2D_BC_TYPE_FIXVAL: if (!prev) bnd_copy(bdst, bsrc->val, bsrc->val_stride, ctx->fd_stencil, cur->solver->domain_size[0]); else bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]); break; case MG2D_BC_TYPE_FIXDIFF: bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]); break; default: return -ENOSYS; } } cur->solver->logger = priv->logger; cur->solver->cpuflags = priv->cpuflags; cur->solver->fd_stencil = ctx->fd_stencil; if (cur->solver->solver_type == EGS_SOLVER_RELAXATION) { EGSRelaxContext *r = cur->solver->solver_data; r->relax_factor = ctx->cfl_factor; r->relax_multiplier = 1.0 / (1.0 + cur->solver->step[0] * cur->solver->step[1] * priv->fac00_max / 8.0); } prev = cur; cur = cur->child; } return 0; } static int threadpool_init(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; MG2DLevel *level = priv->root; int ret; ret = tp_init(&priv->tp, ctx->nb_threads); if (ret < 0) return ret; while (level) { level->solver->tp = priv->tp; level = level->child; } return 0; } int mg2d_solve(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; MG2DLevel *root = priv->root; EGSContext *s_root = root->solver; int64_t time_start; double res_prev, res_cur; int ret; if (!priv->tp) { ret = threadpool_init(ctx); if (ret < 0) return ret; } time_start = gettime(); ret = mg_levels_init(ctx); if (ret < 0) return ret; ret = mg2di_egs_init(s_root); if (ret < 0) return ret; res_prev = s_root->residual_max; for (int i = 0; i < ctx->maxiter; i++) { ret = mg_solve_subgrid(ctx, root); if (ret < 0) goto fail; res_cur = s_root->residual_max; if (res_cur < ctx->tol) { mg2di_log(&priv->logger, MG2D_LOG_INFO, "converged on iteration %d, residual %g\n", i, res_cur); priv->time_solve += gettime() - time_start; priv->count_solve++; return 0; } mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "finished toplevel iteration %d, residual %g -> %g (%g)\n", i, res_prev, res_cur, res_prev / res_cur); if (res_cur / res_prev > 1.0) { mg2di_log(&priv->logger, MG2D_LOG_ERROR, "A multigrid iteration diverged\n"); ret = MG2D_ERR_DIVERGE; goto fail; } res_prev = res_cur; } ret = -EDOM; fail: mg2di_log(&priv->logger, MG2D_LOG_ERROR, "The solver failed to converge\n"); return ret; } static void mg_level_free(MG2DLevel **plevel) { MG2DLevel *level = *plevel; if (!level) return; free(level->prolong_tmp); mg2di_egs_free(&level->solver); free(level); *plevel = NULL; } static MG2DLevel *mg_level_alloc(enum EGSType type, const size_t domain_size) { MG2DLevel *level; int ret; level = calloc(1, sizeof(*level)); if (!level) return NULL; ret = posix_memalign((void**)&level->prolong_tmp, 32, sizeof(*level->prolong_tmp) * SQR(domain_size + 1)); if (ret != 0) goto fail; level->prolong_tmp_stride = domain_size + 1; level->solver = mg2di_egs_alloc(type, (size_t [2]){domain_size, domain_size}); if (!level->solver) goto fail; return level; fail: mg_level_free(&level); return NULL; } static int log2i(int n) { int ret = 0; while (n) { ret++; n >>= 1; } return ret - 1; } static int mg_levels_alloc(MG2DContext *ctx, size_t domain_size) { MG2DInternal *priv = ctx->priv; MG2DLevel *cur, **last; size_t last_size; unsigned int last_depth; priv->root = mg_level_alloc(EGS_SOLVER_RELAXATION, domain_size); if (!priv->root) return -ENOMEM; // choose the domain size for the first child // the children all have sizes 2**n + 1 // but we skip the closest lower one if it is too close if (domain_size <= 2) goto finish; domain_size = (1 << log2i(domain_size - 2)) + 1; if ((double)priv->root->solver->domain_size[0] / domain_size < 1.5) domain_size = (domain_size >> 1) + 1; cur = priv->root; for (int i = 0; i < LEVELS_MAX && domain_size > 4; i++) { cur->child = mg_level_alloc(EGS_SOLVER_RELAXATION, domain_size); if (!cur->child) return -ENOMEM; cur->child->depth = i + 1; cur = cur->child; domain_size = (domain_size >> 1) + 1; } finish: last = &priv->root; while ((*last)->child) last = &((*last)->child); last_size = (*last)->solver->domain_size[0]; last_depth = (*last)->depth; mg_level_free(last); *last = mg_level_alloc(EGS_SOLVER_EXACT, last_size); if (!*last) return -ENOMEM; (*last)->depth = last_depth; return 0; } static void log_default_callback(const MG2DContext *ctx, int level, const char *fmt, va_list vl) { if (level > ctx->log_level) return; vfprintf(stderr, fmt, vl); } MG2DContext *mg2d_solver_alloc(size_t domain_size) { MG2DContext *ctx; MG2DInternal *priv; int ret; if (domain_size < 3 || SIZE_MAX / domain_size < domain_size) return NULL; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->priv = calloc(1, sizeof(*ctx->priv)); if (!ctx->priv) goto fail; priv = ctx->priv; priv->logger.log = log_callback; priv->logger.opaque = ctx; priv->cpuflags = mg2di_cpu_flags_get(); for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { ctx->boundaries[i] = mg2di_bc_alloc(domain_size); if (!ctx->boundaries[i]) { ret = -ENOMEM; goto fail; } } ret = mg_levels_alloc(ctx, domain_size); if (ret < 0) goto fail; ctx->domain_size = domain_size; ctx->maxiter = 16; ctx->tol = 1e-12; ctx->nb_cycles = 1; ctx->nb_relax_pre = 1; ctx->nb_relax_post = 1; ctx->log_callback = log_default_callback; ctx->log_level = MG2D_LOG_INFO; ctx->nb_threads = 1; ctx->u = priv->root->solver->u; ctx->u_stride = priv->root->solver->u_stride; /* initialize the initial guess to zero */ memset(ctx->u, 0, sizeof(*ctx->u) * ctx->u_stride * ctx->domain_size); ctx->rhs = priv->root->solver->rhs; ctx->rhs_stride = priv->root->solver->rhs_stride; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) ctx->diff_coeffs[i] = priv->root->solver->diff_coeffs[i]; ctx->diff_coeffs_stride = priv->root->solver->diff_coeffs_stride; return ctx; fail: mg2d_solver_free(&ctx); return NULL; } void mg2d_solver_free(MG2DContext **pctx) { MG2DContext *ctx = *pctx; if (!ctx) return; while (ctx->priv->root) { MG2DLevel *next = ctx->priv->root->child; mg_level_free(&ctx->priv->root); ctx->priv->root = next; } for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) mg2di_bc_free(&ctx->boundaries[i]); tp_free(&ctx->priv->tp); free(ctx->priv); free(ctx); *pctx = NULL; } void mg2d_print_stats(MG2DContext *ctx, const char *prefix) { MG2DInternal *priv = ctx->priv; MG2DLevel *level = priv->root; int64_t other, levels_total = 0; if (!prefix) prefix = ""; mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "%s%ld solves; %g s total time; %g ms avg per call\n", prefix, priv->count_solve, priv->time_solve / 1e6, priv->time_solve / 1e3 / priv->count_solve); while (level) { EGSRelaxContext *r = NULL; int64_t level_total = level->time_relax + level->time_prolong + level->time_restrict + level->time_correct + level->time_reinit; int64_t relax_total = (r ? r->time_correct : 0) + level->solver->time_res_calc + level->solver->time_boundaries; if (level->solver->solver_type == EGS_SOLVER_RELAXATION) r = level->solver->solver_data; level_total = level->time_relax + level->time_prolong + level->time_restrict + level->time_correct + level->time_reinit; relax_total = (r ? r->time_correct : 0) + level->solver->time_res_calc + level->solver->time_boundaries; levels_total += level_total; mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "%s%2.2f%% level %d: %ld cycles %g s total time %g ms avg per call || " "%2.2f%% relax %2.2f%% prolong %2.2f%% restrict %2.2f%% correct %2.2f%% reinit || " "%2.2f%% residual %2.2f%% correct %2.2f%% boundaries ||" "\n", prefix, level_total * 100.0 / priv->time_solve, level->depth, level->count_cycles, level_total / 1e6, level_total / 1e3 / level->count_cycles, level->time_relax * 100.0 / level_total, level->time_prolong * 100.0 / level_total, level->time_restrict * 100.0 / level_total, level->time_correct * 100.0 / level_total, level->time_reinit * 100.0 / level_total, level->solver->time_res_calc * 100.0 / relax_total, (r ? r->time_correct : 0) * 100.0 / relax_total, level->solver->time_boundaries * 100.0 / relax_total ); level = level->child; } other = priv->time_solve - levels_total; mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "%s%2.2f%% other %g s total time %g ms avg per call\n", prefix, other * 100.0 / priv->time_solve, other / 1e6, other / 1e3 / priv->count_solve); } unsigned int mg2d_max_fd_stencil(void) { return FD_STENCIL_MAX; }