aboutsummaryrefslogtreecommitdiff
path: root/residual_calc.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2019-04-17 18:28:15 +0200
committerAnton Khirnov <anton@khirnov.net>2019-04-19 17:11:40 +0200
commitb2d93a84e3a9c84e8c591c9f55355b5834c03d4b (patch)
tree189213565869426d3d161e334908e63168c0bd99 /residual_calc.c
parent5b94910fc4c6a47856290e9c23f9a905cf63c1eb (diff)
egs: premultiply diff_coeffs with the denominator in init
Do not do it at every residual calc, which also allows us to get rid of an extra parameter (and reduce the number of registers used in x86 SIMD).
Diffstat (limited to 'residual_calc.c')
-rw-r--r--residual_calc.c51
1 files changed, 21 insertions, 30 deletions
diff --git a/residual_calc.c b/residual_calc.c
index 8f67823..bfbb9bf 100644
--- a/residual_calc.c
+++ b/residual_calc.c
@@ -44,8 +44,6 @@ typedef struct ResidualCalcTask {
const double * const *diff_coeffs;
ptrdiff_t diff_coeffs_stride;
-
- const double *fd_factors;
} ResidualCalcTask;
struct ResidualCalcInternal {
@@ -54,8 +52,7 @@ struct ResidualCalcInternal {
void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors);
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]);
size_t calc_blocksize;
ResidualCalcTask task;
@@ -64,29 +61,27 @@ struct ResidualCalcInternal {
#if HAVE_EXTERNAL_ASM
void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors);
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]);
void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors);
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]);
#endif
static void
-derivatives_calc_s1(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride)
+derivatives_calc_s1(double *dst, const double *u, ptrdiff_t stride)
{
dst[MG2D_DIFF_COEFF_00] = u[0];
- dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[MG2D_DIFF_COEFF_10];
- dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[MG2D_DIFF_COEFF_01];
+ dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]);
+ dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]);
- dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[MG2D_DIFF_COEFF_20];
- dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[MG2D_DIFF_COEFF_02];
+ dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]);
+ dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]);
- dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[MG2D_DIFF_COEFF_11];
+ dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]);
}
static void
-derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride)
+derivatives_calc_s2(double *dst, const double *u, ptrdiff_t stride)
{
const double val = u[0];
@@ -120,29 +115,28 @@ derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrd
const double valxm2ym2 = u[-2 - 2 * stride];
dst[MG2D_DIFF_COEFF_00] = val;
- dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_10];
- dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_01];
+ dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2);
+ dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2);
- dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_20];
- dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_02];
+ dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2);
+ dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2);
dst[MG2D_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2
-8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2
+8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2
- -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[MG2D_DIFF_COEFF_11];
+ -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2);
}
static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors)
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB])
{
double res_max = 0.0, res_abs;
for (size_t i = 0; i < linesize; i++) {
double u_vals[MG2D_DIFF_COEFF_NB];
double res;
- derivatives_calc_s1(u_vals, u + i, fd_factors, stride);
+ derivatives_calc_s1(u_vals, u + i, stride);
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
@@ -158,15 +152,14 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma
static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- const double *fd_factors)
+ const double * const diff_coeffs[MG2D_DIFF_COEFF_NB])
{
double res_max = 0.0, res_abs;
for (size_t i = 0; i < linesize; i++) {
double u_vals[MG2D_DIFF_COEFF_NB];
double res;
- derivatives_calc_s2(u_vals, u + i, fd_factors, stride);
+ derivatives_calc_s2(u_vals, u + i, stride);
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
@@ -194,7 +187,7 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre
priv->residual_max + thread_idx * priv->calc_blocksize,
task->u_stride, task->u + job_idx * task->u_stride,
task->rhs + job_idx * task->rhs_stride,
- diff_coeffs, task->fd_factors);
+ diff_coeffs);
return 0;
}
@@ -205,8 +198,7 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
const double *u, ptrdiff_t u_stride,
const double *rhs, ptrdiff_t rhs_stride,
const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- ptrdiff_t diff_coeffs_stride,
- const double *fd_factors)
+ ptrdiff_t diff_coeffs_stride)
{
ResidualCalcInternal *priv = ctx->priv;
ResidualCalcTask *task = &priv->task;
@@ -223,7 +215,6 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
task->rhs_stride = rhs_stride;
task->diff_coeffs = diff_coeffs;
task->diff_coeffs_stride = diff_coeffs_stride;
- task->fd_factors = fd_factors;
tp_execute(ctx->tp, size[1], residual_calc_task, priv);