/* * Copyright 2018 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include "common.h" #include "ell_relax.h" #include "log.h" static const double relax_factors[FD_STENCIL_MAX] = { [0] = 1.0 / 5, [1] = 1.0 / 7, }; struct EllRelaxInternal { ptrdiff_t stride; double *u_base; double *rhs_base; double *residual_base; double *diff_coeffs_base[ELL_RELAX_DIFF_COEFF_NB]; double *boundary_val_base[4]; void (*derivatives_calc)(EllRelaxContext *ctx, double *dst, ptrdiff_t idx, ptrdiff_t stride); double relax_factor; }; static const struct { unsigned int stride_idx; unsigned int is_upper; } boundary_def[] = { [ELL_RELAX_BOUNDARY_0L] = { .stride_idx = 0, .is_upper = 0 }, [ELL_RELAX_BOUNDARY_0U] = { .stride_idx = 0, .is_upper = 1, }, [ELL_RELAX_BOUNDARY_1L] = { .stride_idx = 1, .is_upper = 0, }, [ELL_RELAX_BOUNDARY_1U] = { .stride_idx = 1, .is_upper = 1, }, }; static void derivatives_calc_s1(EllRelaxContext *ctx, double *dst, ptrdiff_t idx, ptrdiff_t stride) { const double *u = ctx->u; const double dx = ctx->step[0]; const double dy = ctx->step[1]; dst[ELL_RELAX_DIFF_COEFF_00] = u[idx]; dst[ELL_RELAX_DIFF_COEFF_10] = (u[idx + 1] - u[idx - 1]) / (2.0 * dx); dst[ELL_RELAX_DIFF_COEFF_01] = (u[idx + stride] - u[idx - stride]) / (2.0 * dy); dst[ELL_RELAX_DIFF_COEFF_20] = (u[idx + 1] - 2.0 * u[idx] + u[idx - 1]) / (SQR(dx)); dst[ELL_RELAX_DIFF_COEFF_02] = (u[idx + stride] - 2.0 * u[idx] + u[idx - stride]) / (SQR(dy)); dst[ELL_RELAX_DIFF_COEFF_11] = (u[idx + 1 + stride] - u[idx + stride - 1] - u[idx - stride + 1] + u[idx - stride - 1]) / (4.0 * dx * dy); } static void derivatives_calc_s2(EllRelaxContext *ctx, double *dst, ptrdiff_t idx, ptrdiff_t stride) { const double *u = ctx->u; const double dx = ctx->step[0]; const double dy = ctx->step[1]; dst[ELL_RELAX_DIFF_COEFF_00] = u[idx]; dst[ELL_RELAX_DIFF_COEFF_10] = (-1.0 * u[idx + 2] + 8.0 * u[idx + 1] - 8.0 * u[idx - 1] + 1.0 * u[idx - 2]) / (12.0 * dx); dst[ELL_RELAX_DIFF_COEFF_01] = (-1.0 * u[idx + 2 * stride] + 8.0 * u[idx + stride] - 8.0 * u[idx - stride] + 1.0 * u[idx - 2 * stride]) / (12.0 * dy); dst[ELL_RELAX_DIFF_COEFF_20] = (-1.0 * u[idx + 2] + 16.0 * u[idx + 1] - 30.0 * u[idx] + 16.0 * u[idx - 1] - 1.0 * u[idx - 2]) / (12.0 * SQR(dx)); dst[ELL_RELAX_DIFF_COEFF_02] = (-1.0 * u[idx + 2 * stride] + 16.0 * u[idx + stride] - 30.0 * u[idx] + 16.0 * u[idx - stride] - 1.0 * u[idx - 2 * stride]) / (12.0 * SQR(dy)); dst[ELL_RELAX_DIFF_COEFF_11] = ( 1.0 * u[idx + 2 + 2 * stride] - 8.0 * u[idx + 2 + stride] + 8.0 * u[idx + 2 - stride] - 1.0 * u[idx + 2 - 2 * stride] -8.0 * u[idx + 1 + 2 * stride] + 64.0 * u[idx + 1 + stride] - 64.0 * u[idx + 1 - stride] + 8.0 * u[idx + 1 - 2 * stride] +8.0 * u[idx - 1 + 2 * stride] - 64.0 * u[idx - 1 + stride] + 64.0 * u[idx - 1 - stride] - 8.0 * u[idx - 1 - 2 * stride] -1.0 * u[idx - 2 + 2 * stride] + 8.0 * u[idx - 2 + stride] - 8.0 * u[idx - 2 - stride] + 1.0 * u[idx - 2 - 2 * stride]) / (144.0 * dx * dy); } static inline double residual_calc_kernel(EllRelaxContext *ctx, ptrdiff_t idx) { EllRelaxInternal *priv = ctx->priv; double u[ELL_RELAX_DIFF_COEFF_NB]; double res; priv->derivatives_calc(ctx, u, idx, priv->stride); res = -ctx->rhs[idx]; for (int i = 0; i < ARRAY_ELEMS(u); i++) res += u[i] * ctx->diff_coeffs[i][idx]; return res; } static void residual_calc(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; int64_t start; start = gettime(); for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { ptrdiff_t idx = idx1 * priv->stride + idx0; ctx->residual[idx] = residual_calc_kernel(ctx, idx); } for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const ptrdiff_t strides[2] = { 1, priv->stride }; const int si = boundary_def[i].stride_idx; const ptrdiff_t stride_boundary = strides[si]; const ptrdiff_t stride_offset = strides[!si]; const size_t size_boundary = ctx->domain_size[si]; const size_t size_offset = ctx->domain_size[!si]; double *res = ctx->residual + boundary_def[i].is_upper * ((size_offset - 1) * stride_offset); if (ctx->boundaries[i].type == ELL_RELAX_BC_TYPE_FIXVAL) { for (size_t j = 0; j < size_boundary; j++) { *res = 0.0; res += stride_boundary; } } } ctx->time_res_calc += gettime() - start; ctx->count_res++; } static void boundaries_apply_fixval(double *dst, const ptrdiff_t dst_stride[2], const double *src, ptrdiff_t src_stride, size_t boundary_size) { for (int j = 0; j < FD_STENCIL_MAX; j++) { for (ptrdiff_t i = -j; i < (ptrdiff_t)boundary_size + j; i++) dst[i * dst_stride[0]] = src[i]; dst += dst_stride[1]; src += src_stride; } } static void boundaries_apply_fixdiff(double *dst, const ptrdiff_t dst_stride[2], const double *src, ptrdiff_t src_stride, size_t boundary_size) { for (size_t i = 0; i < boundary_size; i++) { for (int j = 1; j <= FD_STENCIL_MAX; j++) dst[dst_stride[1] * j] = dst[-dst_stride[1] * j]; dst += dst_stride[0]; } } static void boundaries_apply(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; const ptrdiff_t strides[2] = { 1, priv->stride }; int64_t start; start = gettime(); for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const int si = boundary_def[i].stride_idx; const ptrdiff_t stride_boundary = strides[si]; const ptrdiff_t stride_offset = strides[!si]; const size_t size_boundary = ctx->domain_size[si]; const size_t size_offset = ctx->domain_size[!si]; double *dst = ctx->u + boundary_def[i].is_upper * ((size_offset - 1) * stride_offset); const ptrdiff_t dst_strides[] = { stride_boundary, (boundary_def[i].is_upper ? 1 : -1) * stride_offset }; switch (ctx->boundaries[i].type) { case ELL_RELAX_BC_TYPE_FIXVAL: boundaries_apply_fixval(dst, dst_strides, ctx->boundaries[i].val, ctx->boundaries[i].val_stride, size_boundary); break; case ELL_RELAX_BC_TYPE_FIXDIFF: boundaries_apply_fixdiff(dst, dst_strides, ctx->boundaries[i].val, ctx->boundaries[i].val_stride, size_boundary); break; } } /* fill in the corner ghosts */ { double *dst = ctx->u; for (int i = 1; i <= FD_STENCIL_MAX; i++) for (int j = 1; j <= FD_STENCIL_MAX; j++) { const ptrdiff_t idx = j * strides[1] + i; dst[-idx] = dst[idx]; } } { double *dst = ctx->u + ctx->domain_size[0] - 1; for (int i = 1; i <= FD_STENCIL_MAX; i++) for (int j = 1; j <= FD_STENCIL_MAX; j++) { const ptrdiff_t idx = j * strides[1] - i; dst[-idx] = dst[idx]; } } { double *dst = ctx->u + strides[1] * (ctx->domain_size[1] - 1); for (int i = 1; i <= FD_STENCIL_MAX; i++) for (int j = 1; j <= FD_STENCIL_MAX; j++) { const ptrdiff_t idx = -j * strides[1] + i; dst[-idx] = dst[idx]; } } { double *dst = ctx->u + strides[1] * (ctx->domain_size[1] - 1) + ctx->domain_size[0] - 1; for (int i = 1; i <= FD_STENCIL_MAX; i++) for (int j = 1; j <= FD_STENCIL_MAX; j++) { const ptrdiff_t idx = -j * strides[1] - i; dst[-idx] = dst[idx]; } } ctx->time_boundaries += gettime() - start; ctx->count_boundaries++; } int mg2di_ell_relax_step(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; const double cfl_fac = priv->relax_factor * ctx->step[0] * ctx->step[1]; int64_t start; start = gettime(); for (int idx1 = 0; idx1 < ctx->domain_size[1]; idx1++) for (int idx0 = 0; idx0 < ctx->domain_size[0]; idx0++) { ptrdiff_t idx = idx1 * priv->stride + idx0; ctx->u[idx] += cfl_fac * ctx->residual[idx]; } ctx->time_correct += gettime() - start; ctx->count_correct++; boundaries_apply(ctx); residual_calc(ctx); return 0; } int mg2di_ell_relax_init(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; int ret; switch (ctx->fd_stencil) { case 1: priv->derivatives_calc = derivatives_calc_s1; break; case 2: priv->derivatives_calc = derivatives_calc_s2; break; default: mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n", ctx->fd_stencil); return -EINVAL; } if (ctx->step[0] <= DBL_EPSILON || ctx->step[1] <= DBL_EPSILON) { mg2di_log(&ctx->logger, 0, "Spatial step size too small\n"); return -EINVAL; } if (ctx->relax_factor == 0.0) priv->relax_factor = relax_factors[ctx->fd_stencil - 1]; else priv->relax_factor = ctx->relax_factor; for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { if (ctx->boundaries[i].type == ELL_RELAX_BC_TYPE_FIXDIFF) { for (int k = 0; k < ctx->domain_size[boundary_def[i].stride_idx]; k++) if (ctx->boundaries[i].val[k] != 0.0) { mg2di_log(&ctx->logger, 0, "Only zero boundary derivative supported\n"); return -ENOSYS; } } } boundaries_apply(ctx); residual_calc(ctx); return 0; } static int ell_relax_arrays_alloc(EllRelaxContext *ctx, const size_t domain_size[2]) { EllRelaxInternal *priv = ctx->priv; const size_t ghosts = FD_STENCIL_MAX; const size_t size_padded[2] = { domain_size[0] + 2 * ghosts, domain_size[1] + 2 * ghosts, }; const size_t stride = size_padded[0]; const size_t start_offset = ghosts * stride + ghosts; const size_t arr_size = size_padded[0] * size_padded[1]; int ret; ret = posix_memalign((void**)&priv->u_base, 32, sizeof(*priv->u_base) * arr_size); if (ret != 0) return -ret; ctx->u = priv->u_base + start_offset; ctx->u_stride = stride; ret = posix_memalign((void**)&priv->rhs_base, 32, sizeof(*priv->rhs_base) * arr_size); if (ret != 0) return -ret; ctx->rhs = priv->rhs_base + start_offset; ctx->rhs_stride = stride; ret = posix_memalign((void**)&priv->residual_base, 32, sizeof(*priv->residual_base) * arr_size); if (ret != 0) return -ret; memset(priv->residual_base, 0, sizeof(*priv->residual_base) * arr_size); ctx->residual = priv->residual_base + start_offset; ctx->residual_stride = stride; for (int i = 0; i < ARRAY_ELEMS(ctx->diff_coeffs); i++) { ret = posix_memalign((void**)&priv->diff_coeffs_base[i], 32, sizeof(*priv->diff_coeffs_base[i]) * arr_size); if (ret != 0) return -ret; ctx->diff_coeffs[i] = priv->diff_coeffs_base[i] + start_offset; } ctx->diff_coeffs_stride = stride; priv->stride = stride; for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { const size_t boundary_len0 = domain_size[boundary_def[i].stride_idx]; const size_t boundary_len_max = boundary_len0 + 2 * (FD_STENCIL_MAX - 1); ret = posix_memalign((void**)&priv->boundary_val_base[i], 32, sizeof(*priv->boundary_val_base[i]) * boundary_len_max * FD_STENCIL_MAX); if (ret != 0) return -ret; ctx->boundaries[i].val = priv->boundary_val_base[i] + FD_STENCIL_MAX - 1; ctx->boundaries[i].val_stride = boundary_len_max; } return 0; } EllRelaxContext *mg2di_ell_relax_alloc(size_t domain_size[2]) { EllRelaxContext *ctx; EllRelaxInternal *priv; int ret; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->priv = calloc(1, sizeof(*ctx->priv)); if (!ctx->priv) goto fail; priv = ctx->priv; if (!domain_size[0] || !domain_size[1] || domain_size[0] > SIZE_MAX / domain_size[1]) goto fail; ret = ell_relax_arrays_alloc(ctx, domain_size); if (ret < 0) goto fail; ctx->domain_size[0] = domain_size[0]; ctx->domain_size[1] = domain_size[1]; return ctx; fail: mg2di_ell_relax_free(&ctx); return NULL; } void mg2di_ell_relax_free(EllRelaxContext **pctx) { EllRelaxContext *ctx = *pctx; if (!ctx) return; free(ctx->priv->u_base); free(ctx->priv->rhs_base); free(ctx->priv->residual_base); for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) free(ctx->priv->diff_coeffs_base[i]); for (int i = 0; i < ARRAY_ELEMS(ctx->priv->boundary_val_base); i++) free(ctx->priv->boundary_val_base[i]); free(ctx->priv); free(ctx); *pctx = NULL; }