From b5de5a6977c1c0d0b1a76b084582d8b7d758a9ad Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 15 Apr 2024 14:02:19 +0200 Subject: residual_calc: typedef computation kernels --- residual_calc.c | 59 ++++++++++++++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 34 deletions(-) (limited to 'residual_calc.c') diff --git a/residual_calc.c b/residual_calc.c index a9efebe..a4fd265 100644 --- a/residual_calc.c +++ b/residual_calc.c @@ -31,6 +31,15 @@ #include "mg2d_constants.h" #include "residual_calc.h" +typedef void ResidualLineCalc(size_t linesize, double *dst, double *dst_max, + ptrdiff_t stride, const double *u, const double *rhs, + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + double res_mult); +typedef void ResidualLineAdd (size_t linesize, double *dst, double *dst_max, + ptrdiff_t stride, const double *u, const double *rhs, + const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + double res_mult, double u_mult); + typedef struct ResidualCalcTask { size_t size[2]; @@ -57,36 +66,18 @@ struct ResidualCalcInternal { double *residual_max; size_t residual_max_size; - void (*residual_calc_line)(size_t linesize, double *dst, double *dst_max, - ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - double res_mult); - void (*residual_add_line)(size_t linesize, double *dst, double *dst_max, - ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - double res_mult, double u_mult); + ResidualLineCalc *residual_line_calc; + ResidualLineAdd *residual_line_add; size_t calc_blocksize; ResidualCalcTask task; }; #if HAVE_NASM -void mg2di_residual_calc_line_s1_fma3(size_t linesize, double *dst, double *dst_max, - ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - double res_mult); -void mg2di_residual_add_line_s1_fma3(size_t linesize, double *dst, double *dst_max, - ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - double res_mult, double u_mult); -void mg2di_residual_calc_line_s2_fma3(size_t linesize, double *dst, double *dst_max, - ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - double res_mult); -void mg2di_residual_add_line_s2_fma3(size_t linesize, double *dst, double *dst_max, - ptrdiff_t stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - double res_mult, double u_mult); +ResidualLineCalc mg2di_residual_calc_line_s1_fma3; +ResidualLineCalc mg2di_residual_calc_line_s2_fma3; +ResidualLineAdd mg2di_residual_add_line_s1_fma3; +ResidualLineAdd mg2di_residual_add_line_s2_fma3; #endif static void @@ -257,13 +248,13 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride; if (task->u_mult == 0.0) { - priv->residual_calc_line(task->size[0], dst, + priv->residual_line_calc(task->size[0], dst, priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, diff_coeffs, task->res_mult); } else { - priv->residual_add_line(task->size[0], dst, + priv->residual_line_add(task->size[0], dst, priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, @@ -338,23 +329,23 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx) priv->calc_blocksize = 1; switch (ctx->fd_stencil) { case 1: - priv->residual_calc_line = residual_calc_line_s1_c; - priv->residual_add_line = residual_add_line_s1_c; + priv->residual_line_calc = residual_calc_line_s1_c; + priv->residual_line_add = residual_add_line_s1_c; #if HAVE_NASM if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) { - priv->residual_calc_line = mg2di_residual_calc_line_s1_fma3; - priv->residual_add_line = mg2di_residual_add_line_s1_fma3; + priv->residual_line_calc = mg2di_residual_calc_line_s1_fma3; + priv->residual_line_add = mg2di_residual_add_line_s1_fma3; priv->calc_blocksize = 4; } #endif break; case 2: - priv->residual_calc_line = residual_calc_line_s2_c; - priv->residual_add_line = residual_add_line_s2_c; + priv->residual_line_calc = residual_calc_line_s2_c; + priv->residual_line_add = residual_add_line_s2_c; #if HAVE_NASM if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) { - priv->residual_calc_line = mg2di_residual_calc_line_s2_fma3; - priv->residual_add_line = mg2di_residual_add_line_s2_fma3; + priv->residual_line_calc = mg2di_residual_calc_line_s2_fma3; + priv->residual_line_add = mg2di_residual_add_line_s2_fma3; priv->calc_blocksize = 4; } #endif -- cgit v1.2.3