diff options
Diffstat (limited to 'residual_calc.c')
-rw-r--r-- | residual_calc.c | 53 |
1 files changed, 26 insertions, 27 deletions
diff --git a/residual_calc.c b/residual_calc.c index c06c966..948655e 100644 --- a/residual_calc.c +++ b/residual_calc.c @@ -33,11 +33,11 @@ typedef void ResidualLineCalc(size_t linesize, double *dst, double *dst_max, ptrdiff_t u_stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + const double *diff_coeffs, ptrdiff_t diff_coeffs_offset, double res_mult); typedef void ResidualLineAdd (size_t linesize, double *dst, double *dst_max, ptrdiff_t u_stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + const double *diff_coeffs, ptrdiff_t diff_coeffs_offset, double res_mult, double u_mult); typedef struct ResidualCalcTask { @@ -52,8 +52,9 @@ typedef struct ResidualCalcTask { const double *rhs; ptrdiff_t rhs_stride; - const double * const *diff_coeffs; + const double *diff_coeffs; ptrdiff_t diff_coeffs_stride; + ptrdiff_t diff_coeffs_offset; double u_mult; double res_mult; @@ -74,10 +75,10 @@ struct ResidualCalcInternal { }; #if HAVE_NASM -ResidualLineCalc mg2di_residual_calc_line_s1_fma3; -ResidualLineCalc mg2di_residual_calc_line_s2_fma3; -ResidualLineAdd mg2di_residual_add_line_s1_fma3; -ResidualLineAdd mg2di_residual_add_line_s2_fma3; +ResidualLineCalc mg2di_residual_line_calc_s1_fma3; +ResidualLineCalc mg2di_residual_line_calc_s2_fma3; +ResidualLineAdd mg2di_residual_line_add_s1_fma3; +ResidualLineAdd mg2di_residual_line_add_s2_fma3; #endif static void @@ -142,7 +143,7 @@ derivatives_calc_s2(double *dst, const double *u, ptrdiff_t stride) static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t u_stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + const double *diff_coeffs, ptrdiff_t diff_coeffs_offset, double res_mult) { double res_max = 0.0, res_abs; @@ -154,7 +155,7 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) - res += u_vals[j] * diff_coeffs[j][i]; + res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i]; dst[i] = res_mult * res; res_abs = fabs(res); @@ -166,7 +167,7 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma static void residual_add_line_s1_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t u_stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + const double *diff_coeffs, ptrdiff_t diff_coeffs_offset, double res_mult, double u_mult) { double res_max = 0.0, res_abs; @@ -178,7 +179,7 @@ static void residual_add_line_s1_c(size_t linesize, double *dst, double *dst_max res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) - res += u_vals[j] * diff_coeffs[j][i]; + res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i]; dst[i] = u_mult * u[i] + res_mult * res; res_abs = fabs(res); @@ -190,7 +191,7 @@ static void residual_add_line_s1_c(size_t linesize, double *dst, double *dst_max static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t u_stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + const double *diff_coeffs, ptrdiff_t diff_coeffs_offset, double res_mult) { double res_max = 0.0, res_abs; @@ -202,7 +203,7 @@ static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_ma res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) - res += u_vals[j] * diff_coeffs[j][i]; + res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i]; dst[i] = res_mult * res; res_abs = fabs(res); @@ -214,7 +215,7 @@ static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_ma static void residual_add_line_s2_c(size_t linesize, double *dst, double *dst_max, ptrdiff_t u_stride, const double *u, const double *rhs, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], + const double *diff_coeffs, ptrdiff_t diff_coeffs_offset, double res_mult, double u_mult) { double res_max = 0.0, res_abs; @@ -226,7 +227,7 @@ static void residual_add_line_s2_c(size_t linesize, double *dst, double *dst_max res = -rhs[i]; for (int j = 0; j < ARRAY_ELEMS(u_vals); j++) - res += u_vals[j] * diff_coeffs[j][i]; + res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i]; dst[i] = u_mult * u[i] + res_mult * res; res_abs = fabs(res); @@ -241,24 +242,21 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre ResidualCalcInternal *priv = arg; ResidualCalcTask *task = &priv->task; - const double *diff_coeffs[MG2D_DIFF_COEFF_NB]; + const double *diff_coeffs = task->diff_coeffs + job_idx * task->diff_coeffs_stride; double *dst = task->dst + job_idx * task->dst_stride; - for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++) - diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride; - if (task->u_mult == 0.0) { priv->residual_line_calc(task->size[0], dst, priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, - diff_coeffs, task->res_mult); + diff_coeffs, task->diff_coeffs_offset, task->res_mult); } else { priv->residual_line_add(task->size[0], dst, priv->residual_max + thread_idx * priv->calc_blocksize, task->u_stride, task->u + job_idx * task->u_stride, task->rhs + job_idx * task->rhs_stride, - diff_coeffs, task->res_mult, task->u_mult); + diff_coeffs, task->diff_coeffs_offset, task->res_mult, task->u_mult); } if (task->reflect & (1 << MG2D_BOUNDARY_0L)) { @@ -286,8 +284,8 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], double *dst, ptrdiff_t dst_stride, const double *u, ptrdiff_t u_stride, const double *rhs, ptrdiff_t rhs_stride, - const double * const diff_coeffs[MG2D_DIFF_COEFF_NB], - ptrdiff_t diff_coeffs_stride, + const double *diff_coeffs, ptrdiff_t diff_coeffs_stride, + ptrdiff_t diff_coeffs_offset, double u_mult, double res_mult, int reflect, size_t reflect_dist) { @@ -307,6 +305,7 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2], task->rhs_stride = rhs_stride; task->diff_coeffs = diff_coeffs; task->diff_coeffs_stride = diff_coeffs_stride; + task->diff_coeffs_offset = diff_coeffs_offset; task->u_mult = u_mult; task->res_mult = res_mult; task->reflect = reflect; @@ -333,8 +332,8 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx) priv->residual_line_add = residual_add_line_s1_c; #if HAVE_NASM if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) { - priv->residual_line_calc = mg2di_residual_calc_line_s1_fma3; - priv->residual_line_add = mg2di_residual_add_line_s1_fma3; + priv->residual_line_calc = mg2di_residual_line_calc_s1_fma3; + priv->residual_line_add = mg2di_residual_line_add_s1_fma3; priv->calc_blocksize = 4; } #endif @@ -344,8 +343,8 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx) priv->residual_line_add = residual_add_line_s2_c; #if HAVE_NASM if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) { - priv->residual_line_calc = mg2di_residual_calc_line_s2_fma3; - priv->residual_line_add = mg2di_residual_add_line_s2_fma3; + priv->residual_line_calc = mg2di_residual_line_calc_s2_fma3; + priv->residual_line_add = mg2di_residual_line_add_s2_fma3; priv->calc_blocksize = 4; } #endif |