diff options
Diffstat (limited to 'residual_calc.c')
-rw-r--r-- | residual_calc.c | 51 |
1 files changed, 21 insertions, 30 deletions
diff --git a/residual_calc.c b/residual_calc.c index 8f67823..bfbb9bf 100644 --- a/residual_calc.c +++ b/residual_calc.c @@ -44,8 +44,6 @@ typedef struct ResidualCalcTask { const double * const *diff_coeffs; ptrdiff_t diff_coeffs_stride; - - const double *fd_factors; } ResidualCalcTask; struct ResidualCalcInternal { @@ -54,8 +52,7 @@ struct ResidualCalcInternal { void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors); + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]); size_t calc_blocksize; ResidualCalcTask task; @@ -64,29 +61,27 @@ struct ResidualCalcInternal { #if HAVE_EXTERNAL_ASM void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors); + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]); void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors); + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]); #endif static void -derivatives_calc_s1(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride) +derivatives_calc_s1(double *dst, const double *u, ptrdiff_t stride) { dst[MG2D_DIFF_COEFF_00] = u[0]; - dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]) * fd_factors[MG2D_DIFF_COEFF_10]; - dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]) * fd_factors[MG2D_DIFF_COEFF_01]; + dst[MG2D_DIFF_COEFF_10] = (u[1] - u[-1]); + dst[MG2D_DIFF_COEFF_01] = (u[stride] - u[-stride]); - dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]) * fd_factors[MG2D_DIFF_COEFF_20]; - dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]) * fd_factors[MG2D_DIFF_COEFF_02]; + dst[MG2D_DIFF_COEFF_20] = (u[1] - 2.0 * u[0] + u[-1]); + dst[MG2D_DIFF_COEFF_02] = (u[stride] - 2.0 * u[0] + u[-stride]); - dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]) * fd_factors[MG2D_DIFF_COEFF_11]; + dst[MG2D_DIFF_COEFF_11] = (u[1 + stride] - u[stride - 1] - u[-stride + 1] + u[-stride - 1]); } static void -derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrdiff_t stride) +derivatives_calc_s2(double *dst, const double *u, ptrdiff_t stride) { const double val = u[0]; @@ -120,29 +115,28 @@ derivatives_calc_s2(double *dst, const double *u, const double *fd_factors, ptrd const double valxm2ym2 = u[-2 - 2 * stride]; dst[MG2D_DIFF_COEFF_00] = val; - dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_10]; - dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_01]; + dst[MG2D_DIFF_COEFF_10] = (-1.0 * valxp2 + 8.0 * valxp1 - 8.0 * valxm1 + 1.0 * valxm2); + dst[MG2D_DIFF_COEFF_01] = (-1.0 * valyp2 + 8.0 * valyp1 - 8.0 * valym1 + 1.0 * valym2); - dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2) * fd_factors[MG2D_DIFF_COEFF_20]; - dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2) * fd_factors[MG2D_DIFF_COEFF_02]; + dst[MG2D_DIFF_COEFF_20] = (-1.0 * valxp2 + 16.0 * valxp1 - 30.0 * val + 16.0 * valxm1 - 1.0 * valxm2); + dst[MG2D_DIFF_COEFF_02] = (-1.0 * valyp2 + 16.0 * valyp1 - 30.0 * val + 16.0 * valym1 - 1.0 * valym2); dst[MG2D_DIFF_COEFF_11] = ( 1.0 * valxp2yp2 - 8.0 * valxp2yp1 + 8.0 * valxp2ym1 - 1.0 * valxp2ym2 -8.0 * valxp1yp2 + 64.0 * valxp1yp1 - 64.0 * valxp1ym1 + 8.0 * valxp1ym2 +8.0 * valxm1yp2 - 64.0 * valxm1yp1 + 64.0 * valxm1ym1 - 8.0 * valxm1ym2 - -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2) * fd_factors[MG2D_DIFF_COEFF_11]; + -1.0 * valxm2yp2 + 8.0 * valxm2yp1 - 8.0 * valxm2ym1 + 1.0 * valxm2ym2); } static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors) + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]) { double res_max = 0.0, res_abs; for (size_t i = 0; i < linesize; i++) { double u_vals[MG2D_DIFF_COEFF_NB]; double res; - derivatives_calc_s1(u_vals, u + i, fd_factors, stride); + derivatives_calc_s1(u_vals, u + i, stride); res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) @@ -158,15 +152,14 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - const double *fd_factors) + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB]) { double res_max = 0.0, res_abs; for (size_t i = 0; i < linesize; i++) { double u_vals[MG2D_DIFF_COEFF_NB]; double res; - derivatives_calc_s2(u_vals, u + i, fd_factors, stride); + derivatives_calc_s2(u_vals, u + i, stride); res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) @@ -194,7 +187,7 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, - diff_coeffs, task->fd_factors); + diff_coeffs); return 0; } @@ -205,8 +198,7 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], const double *u, ptrdiff_t u_stride, const double *rhs, ptrdiff_t rhs_stride, const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - ptrdiff_t diff_coeffs_stride, - const double *fd_factors) + ptrdiff_t diff_coeffs_stride) { ResidualCalcInternal *priv = ctx->priv; ResidualCalcTask *task = &priv->task; @@ -223,7 +215,6 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], task->rhs_stride = rhs_stride; task->diff_coeffs = diff_coeffs; task->diff_coeffs_stride = diff_coeffs_stride; - task->fd_factors = fd_factors; tp_execute(ctx->tp, size[1], residual_calc_task, priv); |