diff options
author | Anton Khirnov <anton@khirnov.net> | 2019-04-17 18:28:15 +0200 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2019-04-19 17:11:40 +0200 |
commit | b2d93a84e3a9c84e8c591c9f55355b5834c03d4b (patch) | |
tree | 189213565869426d3d161e334908e63168c0bd99 /residual_calc.c | |
parent | 5b94910fc4c6a47856290e9c23f9a905cf63c1eb (diff) |
egs: premultiply diff_coeffs with the denominator in init
Do not do it at every residual calc, which also allows us to get rid of
an extra parameter (and reduce the number of registers used in x86
SIMD).
Diffstat (limited to 'residual_calc.c')
-rw-r--r-- | residual_calc.c | 51 |
1 files changed, 21 insertions, 30 deletions
diff --git a/residual_calc.c b/residual_calc.c index 8f67823..bfbb9bf 100644 --- a/residual_calc.c +++ b/residual_calc.c @@ -44,8 +44,6 @@ typedef struct ResidualCalcTask { const double * const *diff_coeffs; ptrdiff_t diff_coeffs_stride; - - const double *fd_factors; } ResidualCalcTask; struct ResidualCalcInternal { @@ -54,8 +52,7 @@ struct ResidualCalcInternal { void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors); + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]); size_t calc_blocksize; ResidualCalcTask task; @@ -64,29 +61,27 @@ struct ResidualCalcInternal { #if HAVE_EXTERNAL_ASM void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors); + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]); void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors); + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]); #endif static void -derivatives_calc_s1(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride) +derivatives_calc_s1(double *dst, const double *u, ptrdiff_t stride) { dst[MG2D_DIFF_COEFF_00] = u[0]; - dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[MG2D_DIFF_COEFF_10]; - dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[MG2D_DIFF_COEFF_01]; + dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]); + dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]); - dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[MG2D_DIFF_COEFF_20]; - dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[MG2D_DIFF_COEFF_02]; + dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]); + dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]); - dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[MG2D_DIFF_COEFF_11]; + dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]); } static void -derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride) +derivatives_calc_s2(double *dst, const double *u, ptrdiff_t stride) { const double val = u[0]; @@ -120,29 +115,28 @@ derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrd const double valxm2ym2 = u[-2 - 2 * stride]; dst[MG2D_DIFF_COEFF_00] = val; - dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_10]; - dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_01]; + dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2); + dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2); - dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_20]; - dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_02]; + dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2); + dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2); dst[MG2D_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2 -8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2 +8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2 - -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[MG2D_DIFF_COEFF_11]; + -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2); } static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors) + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]) { double res_max = 0.0, res_abs; for (size_t i = 0; i < linesize; i++) { double u_vals[MG2D_DIFF_COEFF_NB]; double res; - derivatives_calc_s1(u_vals, u + i, fd_factors, stride); + derivatives_calc_s1(u_vals, u + i, stride); res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) @@ -158,15 +152,14 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors) + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]) { double res_max = 0.0, res_abs; for (size_t i = 0; i < linesize; i++) { double u_vals[MG2D_DIFF_COEFF_NB]; double res; - derivatives_calc_s2(u_vals, u + i, fd_factors, stride); + derivatives_calc_s2(u_vals, u + i, stride); res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) @@ -194,7 +187,7 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, - diff_coeffs, task->fd_factors); + diff_coeffs); return 0; } @@ -205,8 +198,7 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], const double *u, ptrdiff_t u_stride, const double *rhs, ptrdiff_t rhs_stride, const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - ptrdiff_t diff_coeffs_stride, - const double *fd_factors) + ptrdiff_t diff_coeffs_stride) { ResidualCalcInternal *priv = ctx->priv; ResidualCalcTask *task = &priv->task; @@ -223,7 +215,6 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], task->rhs_stride = rhs_stride; task->diff_coeffs = diff_coeffs; task->diff_coeffs_stride = diff_coeffs_stride; - task->fd_factors = fd_factors; tp_execute(ctx->tp, size[1], residual_calc_task, priv); |