diff options
Diffstat (limited to 'ell_relax.c')
-rw-r--r-- | ell_relax.c | 68 |
1 files changed, 34 insertions, 34 deletions
diff --git a/ell_relax.c b/ell_relax.c index d79921a..e56296e 100644 --- a/ell_relax.c +++ b/ell_relax.c @@ -25,7 +25,7 @@ #include "ell_relax.h" #include "log.h" -#define FD_STENCIL_MAX 2 +#define CFL_FACT (1.0 / 7.0) struct EllRelaxInternal { ptrdiff_t stride; @@ -34,6 +34,8 @@ struct EllRelaxInternal { double *residual_base; double *diff_coeffs_base[ELL_RELAX_DIFF_COEFF_NB]; + double *boundary_val_base[4]; + void (*derivatives_calc)(EllRelaxContext *ctx, double *dst, ptrdiff_t idx, ptrdiff_t stride); }; @@ -141,35 +143,27 @@ static void residual_calc(EllRelaxContext *ctx) ctx->count_res++; } -static void boundaries_apply_fixval(double *dst, ptrdiff_t dst_stride, - const double *src, - size_t boundary_size, ptrdiff_t boundary_stride, - int is_upper) +static void boundaries_apply_fixval(double *dst, const ptrdiff_t dst_stride[2], + const double *src, ptrdiff_t src_stride, + size_t boundary_size) { - if (!is_upper) - boundary_stride *= -1; - - for (size_t i = 0; i < boundary_size; i++) { - for (int j = 0; j <= FD_STENCIL_MAX; j++) - dst[j * boundary_stride] = *src; - dst += dst_stride; - src++; + for (int j = 0; j < FD_STENCIL_MAX; j++) { + for (ptrdiff_t i = -j; i < (ptrdiff_t)boundary_size + j; i++) + dst[i * dst_stride[0]] = src[i]; + dst += dst_stride[1]; + src += src_stride; } } -static void boundaries_apply_fixdiff(double *dst, ptrdiff_t dst_stride, - const double *src, - size_t boundary_size, ptrdiff_t boundary_stride, - int is_upper) +static void boundaries_apply_fixdiff(double *dst, const ptrdiff_t dst_stride[2], + const double *src, ptrdiff_t src_stride, + size_t boundary_size) { - if (!is_upper) - boundary_stride *= -1; - for (size_t i = 0; i < boundary_size; i++) { for (int j = 1; j <= FD_STENCIL_MAX; j++) - dst[boundary_stride * j] = dst[-boundary_stride * j]; + dst[dst_stride[1] * j] = dst[-dst_stride[1] * j]; - dst += dst_stride; + dst += dst_stride[0]; } } @@ -188,15 +182,17 @@ static void boundaries_apply(EllRelaxContext *ctx) const size_t size_offset = ctx->domain_size[!si]; double *dst = ctx->u + boundary_def[i].is_upper * ((size_offset - 1) * stride_offset); + const ptrdiff_t dst_strides[] = { stride_boundary, + (boundary_def[i].is_upper ? 1 : -1) * stride_offset }; switch (ctx->boundaries[i].type) { case ELL_RELAX_BC_TYPE_FIXVAL: - boundaries_apply_fixval(dst, stride_boundary, ctx->boundaries[i].val, size_boundary, - stride_offset, boundary_def[i].is_upper); + boundaries_apply_fixval(dst, dst_strides, ctx->boundaries[i].val, + ctx->boundaries[i].val_stride, size_boundary); break; case ELL_RELAX_BC_TYPE_FIXDIFF: - boundaries_apply_fixdiff(dst, stride_boundary, ctx->boundaries[i].val, size_boundary, - stride_offset, boundary_def[i].is_upper); + boundaries_apply_fixdiff(dst, dst_strides, ctx->boundaries[i].val, + ctx->boundaries[i].val_stride, size_boundary); break; } } @@ -241,7 +237,7 @@ static void boundaries_apply(EllRelaxContext *ctx) int mg2di_ell_relax_step(EllRelaxContext *ctx) { EllRelaxInternal *priv = ctx->priv; - const double cfl_fac = (1.0 / 5.0) * ctx->step[0] * ctx->step[1]; + const double cfl_fac = CFL_FACT * ctx->step[0] * ctx->step[1]; int64_t start; start = gettime(); @@ -344,12 +340,16 @@ static int ell_relax_arrays_alloc(EllRelaxContext *ctx, const size_t domain_size priv->stride = stride; for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) { - const size_t boundary_size = domain_size[boundary_def[i].stride_idx]; - ret = posix_memalign((void**)&ctx->boundaries[i].val, 32, - sizeof(*ctx->boundaries[i].val) * boundary_size); + const size_t boundary_len0 = domain_size[boundary_def[i].stride_idx]; + const size_t boundary_len_max = boundary_len0 + 2 * (FD_STENCIL_MAX - 1); + + ret = posix_memalign((void**)&priv->boundary_val_base[i], 32, + sizeof(*priv->boundary_val_base[i]) * boundary_len_max * FD_STENCIL_MAX); if (ret != 0) return -ret; - ctx->boundaries[i].val_len = boundary_size; + + ctx->boundaries[i].val = priv->boundary_val_base[i] + FD_STENCIL_MAX - 1; + ctx->boundaries[i].val_stride = boundary_len_max; } return 0; @@ -399,10 +399,10 @@ void mg2di_ell_relax_free(EllRelaxContext **pctx) free(ctx->priv->residual_base); for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++) free(ctx->priv->diff_coeffs_base[i]); - free(ctx->priv); + for (int i = 0; i < ARRAY_ELEMS(ctx->priv->boundary_val_base); i++) + free(ctx->priv->boundary_val_base[i]); - for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++) - free(ctx->boundaries[i].val); + free(ctx->priv); free(ctx); *pctx = NULL; |