aboutsummaryrefslogtreecommitdiff
path: root/residual_calc.c
diff options
context:
space:
mode:
Diffstat (limited to 'residual_calc.c')
-rw-r--r--residual_calc.c124
1 files changed, 111 insertions, 13 deletions
diff --git a/residual_calc.c b/residual_calc.c
index bfbb9bf..3b83c63 100644
--- a/residual_calc.c
+++ b/residual_calc.c
@@ -44,6 +44,11 @@ typedef struct ResidualCalcTask {
const double * const *diff_coeffs;
ptrdiff_t diff_coeffs_stride;
+
+ double u_mult;
+ double res_mult;
+
+ int mirror_line;
} ResidualCalcTask;
struct ResidualCalcInternal {
@@ -52,7 +57,12 @@ struct ResidualCalcInternal {
void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]);
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult);
+ void (*residual_add_line)(size_t linesize, double *dst, double *dst_max,
+ ptrdiff_t stride, const double *u, const double *rhs,
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult, double u_mult);
size_t calc_blocksize;
ResidualCalcTask task;
@@ -61,10 +71,20 @@ struct ResidualCalcInternal {
#if HAVE_EXTERNAL_ASM
void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]);
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult);
+void mg2di_residual_add_line_s1_fma3(size_t linesize, double *dst, double *dst_max,
+ ptrdiff_t stride, const double *u, const double *rhs,
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult, double u_mult);
void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]);
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult);
+void mg2di_residual_add_line_s2_fma3(size_t linesize, double *dst, double *dst_max,
+ ptrdiff_t stride, const double *u, const double *rhs,
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult, double u_mult);
#endif
static void
@@ -129,7 +149,8 @@ derivatives_calc_s2(double *dst, const double *u, ptrdiff_t stride)
static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB])
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult)
{
double res_max = 0.0, res_abs;
for (size_t i = 0; i < linesize; i++) {
@@ -141,7 +162,31 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
res += u_vals[j] * diff_coeffs[j][i];
- dst[i] = res;
+ dst[i] = res_mult * res;
+
+ res_abs = fabs(res);
+ res_max = MAX(res_max, res_abs);
+ }
+
+ *dst_max = MAX(*dst_max, res_max);
+}
+
+static void residual_add_line_s1_c(size_t linesize, double *dst, double *dst_max,
+ ptrdiff_t stride, const double *u, const double *rhs,
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult, double u_mult)
+{
+ double res_max = 0.0, res_abs;
+ for (size_t i = 0; i < linesize; i++) {
+ double u_vals[MG2D_DIFF_COEFF_NB];
+ double res;
+
+ derivatives_calc_s1(u_vals, u + i, stride);
+
+ res = -rhs[i];
+ for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
+ res += u_vals[j] * diff_coeffs[j][i];
+ dst[i] = u_mult * u[i] + res_mult * res;
res_abs = fabs(res);
res_max = MAX(res_max, res_abs);
@@ -152,7 +197,8 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma
static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB])
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult)
{
double res_max = 0.0, res_abs;
for (size_t i = 0; i < linesize; i++) {
@@ -164,7 +210,31 @@ static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_ma
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
res += u_vals[j] * diff_coeffs[j][i];
- dst[i] = res;
+ dst[i] = res_mult * res;
+
+ res_abs = fabs(res);
+ res_max = MAX(res_max, res_abs);
+ }
+
+ *dst_max = MAX(*dst_max, res_max);
+}
+
+static void residual_add_line_s2_c(size_t linesize, double *dst, double *dst_max,
+ ptrdiff_t stride, const double *u, const double *rhs,
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ double res_mult, double u_mult)
+{
+ double res_max = 0.0, res_abs;
+ for (size_t i = 0; i < linesize; i++) {
+ double u_vals[MG2D_DIFF_COEFF_NB];
+ double res;
+
+ derivatives_calc_s2(u_vals, u + i, stride);
+
+ res = -rhs[i];
+ for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
+ res += u_vals[j] * diff_coeffs[j][i];
+ dst[i] = u_mult * u[i] + res_mult * res;
res_abs = fabs(res);
res_max = MAX(res_max, res_abs);
@@ -179,15 +249,35 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre
ResidualCalcTask *task = &priv->task;
const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
+ double *dst = task->dst + job_idx * task->dst_stride;
for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride;
- priv->residual_calc_line(task->line_size, task->dst + job_idx * task->dst_stride,
- priv->residual_max + thread_idx * priv->calc_blocksize,
- task->u_stride, task->u + job_idx * task->u_stride,
- task->rhs + job_idx * task->rhs_stride,
- diff_coeffs);
+ if (task->u_mult == 0.0) {
+ priv->residual_calc_line(task->line_size, dst,
+ priv->residual_max + thread_idx * priv->calc_blocksize,
+ task->u_stride, task->u + job_idx * task->u_stride,
+ task->rhs + job_idx * task->rhs_stride,
+ diff_coeffs, task->res_mult);
+ } else {
+ priv->residual_add_line(task->line_size, dst,
+ priv->residual_max + thread_idx * priv->calc_blocksize,
+ task->u_stride, task->u + job_idx * task->u_stride,
+ task->rhs + job_idx * task->rhs_stride,
+ diff_coeffs, task->res_mult, task->u_mult);
+ }
+ if (task->mirror_line & (1 << 0)) {
+ for (int i = 1; i <= FD_STENCIL_MAX; i++)
+ dst[-i] = dst[i];
+ }
+ if (task->mirror_line & (1 << 1)) {
+ for (int i = 1; i <= FD_STENCIL_MAX; i++)
+ dst[task->line_size - 1 + i] = dst[task->line_size - 1 - i];
+ }
+ if ((task->mirror_line & (1 << 2)) && job_idx > 0 && job_idx <= FD_STENCIL_MAX) {
+ memcpy(task->dst - job_idx * task->dst_stride, dst, sizeof(*dst) * task->line_size);
+ }
return 0;
}
@@ -198,7 +288,8 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
const double *u, ptrdiff_t u_stride,
const double *rhs, ptrdiff_t rhs_stride,
const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- ptrdiff_t diff_coeffs_stride)
+ ptrdiff_t diff_coeffs_stride,
+ double u_mult, double res_mult, int mirror_line)
{
ResidualCalcInternal *priv = ctx->priv;
ResidualCalcTask *task = &priv->task;
@@ -215,6 +306,9 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
task->rhs_stride = rhs_stride;
task->diff_coeffs = diff_coeffs;
task->diff_coeffs_stride = diff_coeffs_stride;
+ task->u_mult = u_mult;
+ task->res_mult = res_mult;
+ task->mirror_line = mirror_line;
tp_execute(ctx->tp, size[1], residual_calc_task, priv);
@@ -234,18 +328,22 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx)
switch (ctx->fd_stencil) {
case 1:
priv->residual_calc_line = residual_calc_line_s1_c;
+ priv->residual_add_line = residual_add_line_s1_c;
#if HAVE_EXTERNAL_ASM
if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
priv->residual_calc_line = mg2di_residual_calc_line_s1_fma3;
+ priv->residual_add_line = mg2di_residual_add_line_s1_fma3;
priv->calc_blocksize = 4;
}
#endif
break;
case 2:
priv->residual_calc_line = residual_calc_line_s2_c;
+ priv->residual_add_line = residual_add_line_s2_c;
#if HAVE_EXTERNAL_ASM
if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
priv->residual_calc_line = mg2di_residual_calc_line_s2_fma3;
+ priv->residual_add_line = mg2di_residual_add_line_s2_fma3;
priv->calc_blocksize = 4;
}
#endif