/* * Multigrid solver for a 2nd order 2D linear PDE. * Copyright 2018 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include "common.h" #include "ell_relax.h" #include "log.h" #include "mg2d.h" #define LEVELS_MAX 16 typedef struct MG2DLevel { unsigned int depth; EllRelaxContext *solver; double *prolong_tmp; ptrdiff_t prolong_tmp_stride; struct MG2DLevel *child; /* timings */ int64_t count_cycles; int64_t time_relax; int64_t time_prolong; int64_t time_restrict; int64_t time_correct; int64_t time_reinit; } MG2DLevel; struct MG2DInternal { MG2DLogger logger; MG2DLevel *root; int64_t time_solve; int64_t count_solve; double *boundaries_base[4]; }; static double findmax(double *arr, size_t len) { double ret = 0.0; for (size_t i = 0; i < len; i++) { double val = fabs(*arr++); if (val > ret) ret = val; } return ret; } static int is_power2(int n) { return !(n & (n - 1)); } static void log_callback(MG2DLogger *log, int level, const char *fmt, va_list vl) { MG2DContext *ctx = log->opaque; ctx->log_callback(ctx, level, fmt, vl); } static void mg_restrict_inject(EllRelaxContext *coarse, double *dst, ptrdiff_t dst_stride, EllRelaxContext *fine, const double *src, ptrdiff_t src_stride) { for (size_t j = 0; j < coarse->domain_size[1]; j++) for (size_t i = 0; i < coarse->domain_size[0]; i++) { const ptrdiff_t idx_dst = j * dst_stride + i; const size_t fine_i = i * 2; const size_t fine_j = j * 2; const ptrdiff_t idx_src = fine_j * src_stride + fine_i; dst[idx_dst] = src[idx_src]; } } static void mg_restrict_fw(EllRelaxContext *coarse, double *dst, ptrdiff_t dst_stride, EllRelaxContext *fine, const double *src, ptrdiff_t src_stride) { for (size_t j = 0; j < coarse->domain_size[1]; j++) for (size_t i = 0; i < coarse->domain_size[0]; i++) { const ptrdiff_t idx_dst = j * dst_stride + i; const size_t fine_i = i * 2; const size_t fine_j = j * 2; const ptrdiff_t idx_src = fine_j * src_stride + fine_i; dst[idx_dst] = 0.25 * src[idx_src] + 0.125 * (src[idx_src + 1] + src[idx_src - 1] + src[idx_src + src_stride] + src[idx_src - src_stride]) + 0.0625 * (src[idx_src + 1 + src_stride] + src[idx_src - 1 + src_stride] + src[idx_src + 1 - src_stride] + src[idx_src - 1 - src_stride]); } } static void mg_prolongate(EllRelaxContext *fine, double *dst, ptrdiff_t dst_stride, EllRelaxContext *coarse, const double *src, ptrdiff_t src_stride) { for (size_t j = 0; j < coarse->domain_size[1]; j++) for (size_t i = 0; i < coarse->domain_size[0]; i++) { const ptrdiff_t idx_src = j * src_stride + i; const ptrdiff_t idx_dst = 2 * (j * dst_stride + i); dst[idx_dst] = src[idx_src]; } for (size_t j = 0; j < fine->domain_size[1]; j += 2) for (size_t i = 1; i < fine->domain_size[0]; i += 2) { const ptrdiff_t idx_dst = j * dst_stride + i; dst[idx_dst] = 0.5 * (dst[idx_dst - 1] + dst[idx_dst + 1]); } for (size_t j = 1; j < fine->domain_size[1]; j += 2) for (size_t i = 0; i < fine->domain_size[0]; i++) { const ptrdiff_t idx = j * dst_stride + i; dst[idx] = 0.5 * (dst[idx - dst_stride] + dst[idx + dst_stride]); } } static void mg_interp_bilinear(EllRelaxContext *ctx_dst, double *dst, ptrdiff_t dst_stride, EllRelaxContext *ctx_src, const double *src, ptrdiff_t src_stride) { const double dx_dst = ctx_dst->step[0]; const double dy_dst = ctx_dst->step[1]; const double dx_src = ctx_src->step[0]; const double dy_src = ctx_src->step[1]; const double scalex = dx_dst / dx_src; const double scaley = dy_dst / dy_src; for (size_t idx1_dst = 0; idx1_dst < ctx_dst->domain_size[1]; idx1_dst++) { const double y = idx1_dst * dy_dst; const size_t idx1_src = idx1_dst * scaley; const double y0 = idx1_src * dy_src; const double y1 = y0 + dy_src; const double fact_y0 = (y1 - y) / dy_src; const double fact_y1 = (y - y0) / dy_src; for (size_t idx0_dst = 0; idx0_dst < ctx_dst->domain_size[0]; idx0_dst++) { const double x = idx0_dst * dx_dst; const size_t idx0_src = idx0_dst * scalex; const size_t idx_src = idx1_src * src_stride + idx0_src; const double x0 = idx0_src * dx_src; const double x1 = x0 + dx_src; const double fact_x0 = (x1 - x) / dx_src; const double fact_x1 = (x - x0) / dx_src; const double val = fact_x0 * (fact_y0 * src[idx_src] + fact_y1 * src[idx_src + src_stride]) + fact_x1 * (fact_y0 * src[idx_src + 1] + fact_y1 * src[idx_src + 1 + src_stride]); dst[idx1_dst * dst_stride + idx0_dst] = val; } } } static int mg_solve_subgrid(MG2DContext *ctx, MG2DLevel *level) { int ret; /* on the refined levels, use zero as the initial guess for the * solution (correction for the upper level) */ if (level->depth > 0) { memset(level->solver->u, 0, sizeof(*level->solver->u) * level->solver->u_stride * level->solver->domain_size[1]); /* re-init the current-level solver (re-calc the residual) */ ret = mg2di_ell_relax_init(level->solver); if (ret < 0) return ret; } /* handle coarsest grid */ if (!level->child) { int64_t start = gettime(); for (int j = 0; j < 16; j++) { ret = mg2di_ell_relax_step(level->solver); if (ret < 0) return ret; } level->time_relax += gettime() - start; level->count_cycles++; goto finish; } for (int i = 0; i < ctx->nb_cycles; i++) { int64_t start; /* pre-restrict relaxation */ start = gettime(); for (int j = 0; j < ctx->nb_relax_pre; j++) { ret = mg2di_ell_relax_step(level->solver); if (ret < 0) return ret; } level->time_relax += gettime() - start; /* restrict the residual as to the coarser-level rhs */ start = gettime(); if (!is_power2(level->solver->domain_size[0] - 1)) { mg_interp_bilinear(level->child->solver, level->child->solver->rhs, level->child->solver->rhs_stride, level->solver, level->solver->residual, level->solver->residual_stride); } else { mg_restrict_fw(level->child->solver, level->child->solver->rhs, level->child->solver->rhs_stride, level->solver, level->solver->residual, level->solver->residual_stride); } level->time_restrict += gettime() - start; /* solve on the coarser level */ ret = mg_solve_subgrid(ctx, level->child); if (ret < 0) return ret; /* prolongate the coarser-level correction */ start = gettime(); if (!is_power2(level->solver->domain_size[0] - 1)) { mg_interp_bilinear(level->solver, level->prolong_tmp, level->prolong_tmp_stride, level->child->solver, level->child->solver->u, level->child->solver->u_stride); } else { mg_prolongate(level->solver, level->prolong_tmp, level->prolong_tmp_stride, level->child->solver, level->child->solver->u, level->child->solver->u_stride); } level->time_prolong += gettime() - start; /* apply the correction */ start = gettime(); for (size_t idx1 = 0; idx1 < level->solver->domain_size[1]; idx1++) for (size_t idx0 = 0; idx0 < level->solver->domain_size[0]; idx0++) { const ptrdiff_t idx_dst = idx1 * level->solver->u_stride + idx0; const ptrdiff_t idx_src = idx1 * level->prolong_tmp_stride + idx0; level->solver->u[idx_dst] -= level->prolong_tmp[idx_src]; } level->time_correct += gettime() - start; /* re-init the current-level solver (re-calc the residual) */ start = gettime(); ret = mg2di_ell_relax_init(level->solver); if (ret < 0) return ret; level->time_reinit += gettime() - start; /* post-correct relaxation */ start = gettime(); for (int j = 0; j < ctx->nb_relax_pre; j++) { ret = mg2di_ell_relax_step(level->solver); if (ret < 0) return ret; } level->time_relax += gettime() - start; level->count_cycles++; } finish: return 0; } static void bnd_zero(EllRelaxBoundary *bdst, size_t nb_rows, size_t domain_size) { for (size_t i = 0; i < nb_rows; i++) { memset(bdst->val + i * bdst->val_stride - i, 0, sizeof(*bdst->val) * (domain_size + 2 * i)); } } static void bnd_copy(EllRelaxBoundary *bdst, const double *src, ptrdiff_t src_stride, size_t nb_rows, size_t domain_size) { for (size_t i = 0; i < nb_rows; i++) { memcpy(bdst->val + i * bdst->val_stride - i, src + i * src_stride - i, sizeof(*bdst->val) * (domain_size + 2 * i)); } } static int mg_levels_init(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; MG2DLevel *cur, *prev; cur = priv->root; prev = NULL; while (cur) { /* Set the equation coefficients. */ if (prev) { for (int i = 0; i < ARRAY_ELEMS(prev->solver->diff_coeffs); i++) { if (!is_power2(prev->solver->domain_size[0] - 1)) { mg_interp_bilinear(cur->solver, cur->solver->diff_coeffs[i], cur->solver->diff_coeffs_stride, prev->solver, prev->solver->diff_coeffs[i], prev->solver->diff_coeffs_stride); } else { mg_restrict_inject(cur->solver, cur->solver->diff_coeffs[i], cur->solver->diff_coeffs_stride, prev->solver, prev->solver->diff_coeffs[i], prev->solver->diff_coeffs_stride); } } } for (int i = 0; i < ARRAY_ELEMS(cur->solver->boundaries); i++) { MG2DBoundary *bsrc = ctx->boundaries[i]; EllRelaxBoundary *bdst = &cur->solver->boundaries[i]; switch (bsrc->type) { case MG2D_BC_TYPE_FIXVAL: bdst->type = ELL_RELAX_BC_TYPE_FIXVAL; if (!prev) bnd_copy(bdst, bsrc->val, bsrc->val_stride, ctx->fd_stencil, cur->solver->domain_size[0]); else bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]); break; case MG2D_BC_TYPE_FIXDIFF: bdst->type = ELL_RELAX_BC_TYPE_FIXDIFF; bnd_zero(bdst, ctx->fd_stencil, cur->solver->domain_size[0]); break; default: return -ENOSYS; } } cur->solver->logger = priv->logger; if (!prev) { cur->solver->step[0] = ctx->step[0]; cur->solver->step[1] = ctx->step[1]; } else { cur->solver->step[0] = prev->solver->step[0] * ((double)(prev->solver->domain_size[0] - 1) / (cur->solver->domain_size[0] - 1)); cur->solver->step[1] = prev->solver->step[1] * ((double)(prev->solver->domain_size[1] - 1) / (cur->solver->domain_size[1] - 1)); } cur->solver->fd_stencil = ctx->fd_stencil; prev = cur; cur = cur->child; } return 0; } int mg2d_solve(MG2DContext *ctx) { MG2DInternal *priv = ctx->priv; int64_t time_start; double res_prev, res_cur; int ret; time_start = gettime(); ret = mg_levels_init(ctx); if (ret < 0) return ret; ret = mg2di_ell_relax_init(priv->root->solver); if (ret < 0) return ret; res_prev = findmax(priv->root->solver->residual, priv->root->solver->residual_stride * ctx->domain_size); for (int i = 0; i < ctx->maxiter; i++) { mg_solve_subgrid(ctx, priv->root); res_cur = findmax(priv->root->solver->residual, priv->root->solver->residual_stride * ctx->domain_size); if (res_cur < ctx->tol) { mg2di_log(&priv->logger, MG2D_LOG_INFO, "converged on iteration %d, residual %g\n", i, res_cur); priv->time_solve += gettime() - time_start; priv->count_solve++; return 0; } mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "finished toplevel iteration %d, residual %g -> %g (%g)\n", i, res_prev, res_cur, res_prev / res_cur); res_prev = res_cur; } mg2di_log(&priv->logger, MG2D_LOG_ERROR, "The solver failed to converge\n"); return -EDOM; } static void mg_level_free(MG2DLevel **plevel) { MG2DLevel *level = *plevel; if (!level) return; free(level->prolong_tmp); mg2di_ell_relax_free(&level->solver); free(level); *plevel = NULL; } static MG2DLevel *mg_level_alloc(const size_t domain_size) { MG2DLevel *level; int ret; level = calloc(1, sizeof(*level)); if (!level) return NULL; ret = posix_memalign((void**)&level->prolong_tmp, 32, sizeof(*level->prolong_tmp) * SQR(domain_size)); if (ret != 0) goto fail; level->prolong_tmp_stride = domain_size; level->solver = mg2di_ell_relax_alloc((size_t [2]){domain_size, domain_size}); if (!level->solver) goto fail; return level; fail: mg_level_free(&level); return NULL; } static int log2i(int n) { int ret = 0; while (n) { ret++; n >>= 1; } return ret - 1; } static int mg_levels_alloc(MG2DContext *ctx, size_t domain_size) { MG2DInternal *priv = ctx->priv; MG2DLevel *cur; priv->root = mg_level_alloc(domain_size); if (!priv->root) return -ENOMEM; cur = priv->root; domain_size = (1 << log2i(domain_size - 2)) + 1; for (int i = 0; i < LEVELS_MAX && domain_size > 4; i++) { cur->child = mg_level_alloc(domain_size); if (!cur->child) return -ENOMEM; cur->child->depth = i + 1; cur = cur->child; domain_size = (domain_size >> 1) + 1; } return 0; } static void log_default_callback(const MG2DContext *ctx, int level, const char *fmt, va_list vl) { vfprintf(stderr, fmt, vl); } MG2DContext *mg2d_solver_alloc(size_t domain_size) { MG2DContext *ctx; MG2DInternal *priv; int ret; if (domain_size < 3 || SIZE_MAX / domain_size < domain_size) return NULL; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->priv = calloc(1, sizeof(*ctx->priv)); if (!ctx->priv) goto fail; priv = ctx->priv; priv->logger.log = log_callback; priv->logger.opaque = ctx; for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const size_t boundary_len_max = domain_size + 2 * (FD_STENCIL_MAX - 1); ctx->boundaries[i] = calloc(1, sizeof(*ctx->boundaries[i])); if (!ctx->boundaries[i]) goto fail; ret = posix_memalign((void**)&priv->boundaries_base[i], 32, sizeof(*ctx->boundaries[i]->val) * boundary_len_max * FD_STENCIL_MAX); if (ret != 0) goto fail; ctx->boundaries[i]->val = priv->boundaries_base[i] + FD_STENCIL_MAX - 1; ctx->boundaries[i]->val_stride = boundary_len_max; } ret = mg_levels_alloc(ctx, domain_size); if (ret < 0) goto fail; ctx->domain_size = domain_size; ctx->maxiter = 16; ctx->tol = 1e-12; ctx->nb_cycles = 1; ctx->nb_relax_pre = 1; ctx->nb_relax_post = 1; ctx->log_callback = log_default_callback; ctx->u = priv->root->solver->u; ctx->u_stride = priv->root->solver->u_stride; /* initialize the initial guess to zero */ memset(ctx->u, 0, sizeof(*ctx->u) * ctx->u_stride * ctx->domain_size); ctx->rhs = priv->root->solver->rhs; ctx->rhs_stride = priv->root->solver->rhs_stride; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) ctx->diff_coeffs[i] = priv->root->solver->diff_coeffs[i]; ctx->diff_coeffs_stride = priv->root->solver->diff_coeffs_stride; return ctx; fail: mg2d_solver_free(&ctx); return NULL; } void mg2d_solver_free(MG2DContext **pctx) { MG2DContext *ctx = *pctx; if (!ctx) return; while (ctx->priv->root) { MG2DLevel *next = ctx->priv->root->child; mg_level_free(&ctx->priv->root); ctx->priv->root = next; } for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { free(ctx->priv->boundaries_base[i]); free(ctx->boundaries[i]); } free(ctx->priv); free(ctx); *pctx = NULL; } void mg2d_print_stats(MG2DContext *ctx, const char *prefix) { MG2DInternal *priv = ctx->priv; MG2DLevel *level = priv->root; if (!prefix) prefix = ""; mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "%s%ld solves; %g s total time; %g ms avg per call\n", prefix, priv->count_solve, priv->time_solve / 1e6, priv->time_solve / 1e3 / priv->count_solve); while (level) { int64_t level_total = level->time_relax + level->time_prolong + level->time_restrict + level->time_correct + level->time_reinit; int64_t relax_total = level->solver->time_correct + level->solver->time_res_calc + level->solver->time_boundaries; mg2di_log(&priv->logger, MG2D_LOG_VERBOSE, "%s%2.2f%% level %d: %ld cycles %g s total time %g ms avg per call || " "%2.2f%% relax %2.2f%% prolong %2.2f%% restrict %2.2f%% correct %2.2f%% reinit || " "%2.2f%% residual %2.2f%% correct %2.2f%% boundaries\n", prefix, level_total * 100.0 / priv->time_solve, level->depth, level->count_cycles, level_total / 1e6, level_total / 1e3 / level->count_cycles, level->time_relax * 100.0 / level_total, level->time_prolong * 100.0 / level_total, level->time_restrict * 100.0 / level_total, level->time_correct * 100.0 / level_total, level->time_reinit * 100.0 / level_total, level->solver->time_res_calc * 100.0 / relax_total, level->solver->time_correct * 100.0 / relax_total, level->solver->time_boundaries * 100.0 / relax_total ); level = level->child; } }