/* * Copyright 2018 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include "common.h" #include "ell_relax.h" #include "log.h" #define FD_STENCIL_MAX 1 struct EllRelaxInternal { ptrdiff_t stride; double *u_base; double *rhs_base; double *residual_base; double *diff_coeffs_base[ELL_RELAX_DIFF_COEFF_NB]; }; static const struct { unsigned int stride_idx; unsigned int is_upper; } boundary_def[] = { [ELL_RELAX_BOUNDARY_0L] = { .stride_idx = 0, .is_upper = 0 }, [ELL_RELAX_BOUNDARY_0U] = { .stride_idx = 0, .is_upper = 1, }, [ELL_RELAX_BOUNDARY_1L] = { .stride_idx = 1, .is_upper = 0, }, [ELL_RELAX_BOUNDARY_1U] = { .stride_idx = 1, .is_upper = 1, }, }; static inline void derivatives_calc(EllRelaxContext *ctx, double *dst, ptrdiff_t idx, ptrdiff_t stride) { const double *u = ctx->u; const double dx = ctx->step[0]; const double dy = ctx->step[1]; dst[ELL_RELAX_DIFF_COEFF_00] = u[idx]; dst[ELL_RELAX_DIFF_COEFF_10] = (u[idx + 1] - u[idx - 1]) / (2.0 * dx); dst[ELL_RELAX_DIFF_COEFF_01] = (u[idx + stride] - u[idx - stride]) / (2.0 * dy); dst[ELL_RELAX_DIFF_COEFF_20] = (u[idx + 1] - 2.0 * u[idx] + u[idx - 1]) / (SQR(dx)); dst[ELL_RELAX_DIFF_COEFF_02] = (u[idx + stride] - 2.0 * u[idx] + u[idx - stride]) / (SQR(dy)); dst[ELL_RELAX_DIFF_COEFF_11] = (u[idx + 1 + stride] - u[idx + stride - 1] - u[idx - stride + 1] + u[idx - stride - 1]) / (4.0 * dx * dy); } static void residual_calc(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { ptrdiff_t idx = idx1 * priv->stride + idx0; double u[ELL_RELAX_DIFF_COEFF_NB]; double res; derivatives_calc(ctx, u, idx, priv->stride); res = -ctx->rhs[idx]; for (int i = 0; i < ARRAY_ELEMS(u); i++) res += u[i] * ctx->diff_coeffs[i][idx]; ctx->residual[idx] = res; } for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const ptrdiff_t strides[2] = { 1, priv->stride }; const int si = boundary_def[i].stride_idx; const ptrdiff_t stride_boundary = strides[si]; const ptrdiff_t stride_offset = strides[!si]; const size_t size_boundary = ctx->domain_size[si]; const size_t size_offset = ctx->domain_size[!si]; double *res = ctx->residual + boundary_def[i].is_upper * ((size_offset - 1) * stride_offset); if (ctx->boundaries[i].type == ELL_RELAX_BC_TYPE_FIXVAL) { for (size_t j = 0; j < size_boundary; j++) { *res = 0.0; res += stride_boundary; } } } } static void boundaries_apply_fixval(double *dst, ptrdiff_t dst_stride, const double *src, size_t boundary_size, ptrdiff_t boundary_stride, int is_upper) { for (size_t i = 0; i < boundary_size; i++) { *dst = *src; dst += dst_stride; src++; } } static void boundaries_apply_fixdiff(double *dst, ptrdiff_t dst_stride, const double *src, size_t boundary_size, ptrdiff_t boundary_stride, int is_upper) { if (!is_upper) boundary_stride *= -1; for (size_t i = 0; i < boundary_size; i++) { for (int i = 1; i <= FD_STENCIL_MAX; i++) dst[boundary_stride * i] = dst[-boundary_stride * i]; dst += dst_stride; } } static void boundaries_apply(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; const ptrdiff_t strides[2] = { 1, priv->stride }; for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const int si = boundary_def[i].stride_idx; const ptrdiff_t stride_boundary = strides[si]; const ptrdiff_t stride_offset = strides[!si]; const size_t size_boundary = ctx->domain_size[si]; const size_t size_offset = ctx->domain_size[!si]; double *dst = ctx->u + boundary_def[i].is_upper * ((size_offset - 1) * stride_offset); switch (ctx->boundaries[i].type) { case ELL_RELAX_BC_TYPE_FIXVAL: boundaries_apply_fixval(dst, stride_boundary, ctx->boundaries[i].val, size_boundary, stride_offset, boundary_def[i].is_upper); break; case ELL_RELAX_BC_TYPE_FIXDIFF: boundaries_apply_fixdiff(dst, stride_boundary, ctx->boundaries[i].val, size_boundary, stride_offset, boundary_def[i].is_upper); break; } } } int mg2di_ell_relax_step(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; const double cfl_fac = (1.0 / 5.0) * ctx->step[0] * ctx->step[1]; for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { ptrdiff_t idx = idx1 * priv->stride + idx0; ctx->u[idx] += cfl_fac * ctx->residual[idx]; } boundaries_apply(ctx); residual_calc(ctx); return 0; } int mg2di_ell_relax_init(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; int ret; if (ctx->fd_stencil == 0 || ctx->fd_stencil > FD_STENCIL_MAX) { mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n", ctx->fd_stencil); return -EINVAL; } if (ctx->step[0] <= DBL_EPSILON || ctx->step[1] <= DBL_EPSILON) { mg2di_log(&ctx->logger, 0, "Spatial step size too small\n"); return -EINVAL; } for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { if (ctx->boundaries[i].type == ELL_RELAX_BC_TYPE_FIXDIFF) { for (int k = 0; k < ctx->domain_size[boundary_def[i].stride_idx]; k++) if (ctx->boundaries[i].val[k] != 0.0) { mg2di_log(&ctx->logger, 0, "Only zero boundary derivative supported\n"); return -ENOSYS; } } } boundaries_apply(ctx); residual_calc(ctx); return 0; } static int ell_relax_arrays_alloc(EllRelaxContext *ctx, const size_t domain_size[2]) { EllRelaxInternal *priv = ctx->priv; const size_t ghosts = FD_STENCIL_MAX; const size_t size_padded[2] = { domain_size[0] + 2 * ghosts, domain_size[1] + 2 * ghosts, }; const size_t stride = size_padded[0]; const size_t start_offset = ghosts * stride + ghosts; const size_t arr_size = size_padded[0] * size_padded[1]; int ret; ret = posix_memalign((void**)&priv->u_base, 32, sizeof(*priv->u_base) * arr_size); if (ret != 0) return -ret; ctx->u = priv->u_base + start_offset; ctx->u_stride = stride; ret = posix_memalign((void**)&priv->rhs_base, 32, sizeof(*priv->rhs_base) * arr_size); if (ret != 0) return -ret; ctx->rhs = priv->rhs_base + start_offset; ctx->rhs_stride = stride; ret = posix_memalign((void**)&priv->residual_base, 32, sizeof(*priv->residual_base) * arr_size); if (ret != 0) return -ret; memset(priv->residual_base, 0, sizeof(*priv->residual_base) * arr_size); ctx->residual = priv->residual_base + start_offset; ctx->residual_stride = stride; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { ret = posix_memalign((void**)&priv->diff_coeffs_base[i], 32, sizeof(*priv->diff_coeffs_base[i]) * arr_size); if (ret != 0) return -ret; ctx->diff_coeffs[i] = priv->diff_coeffs_base[i] + start_offset; } ctx->diff_coeffs_stride = stride; priv->stride = stride; for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const size_t boundary_size = domain_size[boundary_def[i].stride_idx]; ret = posix_memalign((void**)&ctx->boundaries[i].val, 32, sizeof(*ctx->boundaries[i].val) * boundary_size); if (ret != 0) return -ret; ctx->boundaries[i].val_len = boundary_size; } return 0; } EllRelaxContext *mg2di_ell_relax_alloc(size_t domain_size[2]) { EllRelaxContext *ctx; EllRelaxInternal *priv; int ret; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->priv = calloc(1, sizeof(*ctx->priv)); if (!ctx->priv) goto fail; priv = ctx->priv; if (!domain_size[0] || !domain_size[1] || domain_size[0] > SIZE_MAX / domain_size[1]) goto fail; ret = ell_relax_arrays_alloc(ctx, domain_size); if (ret < 0) goto fail; ctx->domain_size[0] = domain_size[0]; ctx->domain_size[1] = domain_size[1]; return ctx; fail: mg2di_ell_relax_free(&ctx); return NULL; } void mg2di_ell_relax_free(EllRelaxContext **pctx) { EllRelaxContext *ctx = *pctx; if (!ctx) return; free(ctx->priv->u_base); free(ctx->priv->rhs_base); free(ctx->priv->residual_base); for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) free(ctx->priv->diff_coeffs_base[i]); free(ctx->priv); for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) free(ctx->boundaries[i].val); free(ctx); *pctx = NULL; }