aboutsummaryrefslogtreecommitdiff
path: root/ell_grid_solve.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-01-30 11:36:34 +0100
committerAnton Khirnov <anton@khirnov.net>2019-01-30 11:36:34 +0100
commitb584bfe20168ac6208154b1eef395b3805b35e77 (patch)
tree1882c7708e474adfc864bc3c1a1bd90a4b83d7fd /ell_grid_solve.c
parent783d260e0d47d6adb4388fea9ed8e35122d4f6c2 (diff)
ell_grid_solve: split residual computation into its own file
Diffstat (limited to 'ell_grid_solve.c')
-rw-r--r--ell_grid_solve.c223
1 files changed, 31 insertions, 192 deletions
diff --git a/ell_grid_solve.c b/ell_grid_solve.c
index 20de6bc..1893214 100644
--- a/ell_grid_solve.c
+++ b/ell_grid_solve.c
@@ -28,12 +28,12 @@
#include <lapacke.h>
#include "common.h"
-#include "cpu.h"
#include "ell_grid_solve.h"
#include "log.h"
#include "mg2d_boundary.h"
#include "mg2d_boundary_internal.h"
#include "mg2d_constants.h"
+#include "residual_calc.h"
static const double relax_factors[FD_STENCIL_MAX] = {
[0] = 1.0 / 5,
@@ -63,18 +63,12 @@ struct EGSInternal {
double *residual_base;
double *diff_coeffs_base[MG2D_DIFF_COEFF_NB];
- double *residual_max;
- size_t residual_max_size;
-
- void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max,
- ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors);
- size_t calc_blocksize;
ptrdiff_t residual_calc_offset;
size_t residual_calc_size[2];
double fd_factors[MG2D_DIFF_COEFF_NB];
+ ResidualCalcContext *rescalc;
+
union {
EGSRelaxInternal r;
EGSExactInternal e;
@@ -124,158 +118,21 @@ static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = {
},
};
-#if HAVE_EXTERNAL_ASM
-void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max,
- ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors);
-void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max,
- ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors);
-#endif
-
-static void
-derivatives_calc_s1(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride)
-{
- dst[MG2D_DIFF_COEFF_00] = u[0];
- dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[MG2D_DIFF_COEFF_10];
- dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[MG2D_DIFF_COEFF_01];
-
- dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[MG2D_DIFF_COEFF_20];
- dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[MG2D_DIFF_COEFF_02];
-
- dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[MG2D_DIFF_COEFF_11];
-}
-
-static void
-derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride)
-{
- const double val = u[0];
-
- const double valxp1 = u[ 1];
- const double valxp2 = u[ 2];
- const double valxm1 = u[-1];
- const double valxm2 = u[-2];
- const double valyp1 = u[ 1 * stride];
- const double valyp2 = u[ 2 * stride];
- const double valym1 = u[-1 * stride];
- const double valym2 = u[-2 * stride];
-
- const double valxp1yp1 = u[ 1 + 1 * stride];
- const double valxp1yp2 = u[ 1 + 2 * stride];
- const double valxp1ym1 = u[ 1 - 1 * stride];
- const double valxp1ym2 = u[ 1 - 2 * stride];
-
- const double valxp2yp1 = u[ 2 + 1 * stride];
- const double valxp2yp2 = u[ 2 + 2 * stride];
- const double valxp2ym1 = u[ 2 - 1 * stride];
- const double valxp2ym2 = u[ 2 - 2 * stride];
-
- const double valxm1yp1 = u[-1 + 1 * stride];
- const double valxm1yp2 = u[-1 + 2 * stride];
- const double valxm1ym1 = u[-1 - 1 * stride];
- const double valxm1ym2 = u[-1 - 2 * stride];
-
- const double valxm2yp1 = u[-2 + 1 * stride];
- const double valxm2yp2 = u[-2 + 2 * stride];
- const double valxm2ym1 = u[-2 - 1 * stride];
- const double valxm2ym2 = u[-2 - 2 * stride];
-
- dst[MG2D_DIFF_COEFF_00] = val;
- dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_10];
- dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_01];
-
- dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_20];
- dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_02];
-
- dst[MG2D_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2
- -8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2
- +8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2
- -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[MG2D_DIFF_COEFF_11];
-}
-
-static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max,
- ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors)
-{
- double res_max = 0.0, res_abs;
- for (size_t i = 0; i < linesize; i++) {
- double u_vals[MG2D_DIFF_COEFF_NB];
- double res;
-
- derivatives_calc_s1(u_vals, u + i, fd_factors, stride);
-
- res = -rhs[i];
- for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
- res += u_vals[j] * diff_coeffs[j][i];
- dst[i] = res;
-
- res_abs = fabs(res);
- res_max = MAX(res_max, res_abs);
- }
-
- *dst_max = MAX(*dst_max, res_max);
-}
-
-static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max,
- ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors)
-{
- double res_max = 0.0, res_abs;
- for (size_t i = 0; i < linesize; i++) {
- double u_vals[MG2D_DIFF_COEFF_NB];
- double res;
-
- derivatives_calc_s2(u_vals, u + i, fd_factors, stride);
-
- res = -rhs[i];
- for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
- res += u_vals[j] * diff_coeffs[j][i];
- dst[i] = res;
-
- res_abs = fabs(res);
- res_max = MAX(res_max, res_abs);
- }
-
- *dst_max = MAX(*dst_max, res_max);
-}
-
-static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thread_idx)
-{
- EGSContext *ctx = arg;
- EGSInternal *priv = ctx->priv;
- const ptrdiff_t offset = priv->residual_calc_offset + job_idx * priv->stride;
- const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
-
- for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = ctx->diff_coeffs[i] + offset;
-
- priv->residual_calc_line(priv->residual_calc_size[0], ctx->residual + offset,
- priv->residual_max + thread_idx * priv->calc_blocksize,
- priv->stride, ctx->u + offset, ctx->rhs + offset,
- diff_coeffs, priv->fd_factors);
-
- return 0;
-}
-
static void residual_calc(EGSContext *ctx)
{
EGSInternal *priv = ctx->priv;
- double res_max = 0.0;
+ const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
int64_t start;
- memset(priv->residual_max, 0, sizeof(*priv->residual_max) * priv->residual_max_size);
-
start = gettime();
- tp_execute(ctx->tp, priv->residual_calc_size[1], residual_calc_task, ctx);
+ for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
+ diff_coeffs[i] = ctx->diff_coeffs[i] + priv->residual_calc_offset;
- for (size_t i = 0; i < priv->residual_max_size; i++)
- res_max = MAX(res_max, priv->residual_max[i]);
- ctx->residual_max = res_max;
+ mg2di_residual_calc(priv->rescalc, priv->residual_calc_size, priv->stride,
+ &ctx->residual_max, ctx->residual + priv->residual_calc_offset,
+ ctx->u + priv->residual_calc_offset, ctx->rhs + priv->residual_calc_offset,
+ diff_coeffs, priv->fd_factors);
ctx->time_res_calc += gettime() - start;
ctx->count_res++;
@@ -704,39 +561,17 @@ int mg2di_egs_solve(EGSContext *ctx)
int mg2di_egs_init(EGSContext *ctx)
{
EGSInternal *priv = ctx->priv;
- double *tmp;
int ret;
- priv->calc_blocksize = 1;
- switch (ctx->fd_stencil) {
- case 1:
- if (ctx->solver_type == EGS_SOLVER_EXACT)
- priv->e.fill_mat = fill_mat_s1;
-
- priv->residual_calc_line = residual_calc_line_s1_c;
-#if HAVE_EXTERNAL_ASM
- if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
- priv->residual_calc_line = mg2di_residual_calc_line_s1_fma3;
- priv->calc_blocksize = 4;
- }
-#endif
- break;
- case 2:
- if (ctx->solver_type == EGS_SOLVER_EXACT)
- priv->e.fill_mat = fill_mat_s2;
-
- priv->residual_calc_line = residual_calc_line_s2_c;
-#if HAVE_EXTERNAL_ASM
- if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
- priv->residual_calc_line = mg2di_residual_calc_line_s2_fma3;
- priv->calc_blocksize = 4;
+ if (ctx->solver_type == EGS_SOLVER_EXACT) {
+ switch (ctx->fd_stencil) {
+ case 1: priv->e.fill_mat = fill_mat_s1; break;
+ case 2: priv->e.fill_mat = fill_mat_s2; break;
+ default:
+ mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n",
+ ctx->fd_stencil);
+ return -EINVAL;
}
-#endif
- break;
- default:
- mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n",
- ctx->fd_stencil);
- return -EINVAL;
}
if (ctx->step[0] <= DBL_EPSILON || ctx->step[1] <= DBL_EPSILON) {
@@ -791,14 +626,13 @@ int mg2di_egs_init(EGSContext *ctx)
}
}
- priv->residual_max_size = tp_get_nb_threads(ctx->tp) * priv->calc_blocksize;
- tmp = realloc(priv->residual_max,
- sizeof(*priv->residual_max) * priv->residual_max_size);
- if (!tmp) {
- priv->residual_max_size = 0;
- return -ENOMEM;
- }
- priv->residual_max = tmp;
+ priv->rescalc->tp = ctx->tp;
+ priv->rescalc->fd_stencil = ctx->fd_stencil;
+ priv->rescalc->cpuflags = ctx->cpuflags;
+
+ ret = mg2di_residual_calc_init(priv->rescalc);
+ if (ret < 0)
+ return ret;
boundaries_apply(ctx);
residual_calc(ctx);
@@ -909,6 +743,10 @@ EGSContext *mg2di_egs_alloc(enum EGSType type, size_t domain_size[2])
ctx->domain_size[0] = domain_size[0];
ctx->domain_size[1] = domain_size[1];
+ ctx->priv->rescalc = mg2di_residual_calc_alloc();
+ if (!ctx->priv->rescalc)
+ goto fail;
+
return ctx;
fail:
mg2di_egs_free(&ctx);
@@ -922,6 +760,8 @@ void mg2di_egs_free(EGSContext **pctx)
if (!ctx)
return;
+ mg2di_residual_calc_free(&ctx->priv->rescalc);
+
free(ctx->solver_data);
if (ctx->solver_type == EGS_SOLVER_EXACT) {
@@ -936,7 +776,6 @@ void mg2di_egs_free(EGSContext **pctx)
free(ctx->priv->u_base);
free(ctx->priv->rhs_base);
free(ctx->priv->residual_base);
- free(ctx->priv->residual_max);
for (int i = 0; i < ARRAY_ELEMS(ctx->priv->diff_coeffs_base); i++)
free(ctx->priv->diff_coeffs_base[i]);
for (int i = 0; i < ARRAY_ELEMS(ctx->boundaries); i++)