aboutsummaryrefslogtreecommitdiff
path: root/residual_calc.c
diff options
context:
space:
mode:
Diffstat (limited to 'residual_calc.c')
-rw-r--r--residual_calc.c53
1 files changed, 26 insertions, 27 deletions
diff --git a/residual_calc.c b/residual_calc.c
index c06c966..948655e 100644
--- a/residual_calc.c
+++ b/residual_calc.c
@@ -33,11 +33,11 @@
typedef void ResidualLineCalc(size_t linesize, double *dst, double *dst_max,
ptrdiff_t u_stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ const double *diff_coeffs, ptrdiff_t diff_coeffs_offset,
double res_mult);
typedef void ResidualLineAdd (size_t linesize, double *dst, double *dst_max,
ptrdiff_t u_stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ const double *diff_coeffs, ptrdiff_t diff_coeffs_offset,
double res_mult, double u_mult);
typedef struct ResidualCalcTask {
@@ -52,8 +52,9 @@ typedef struct ResidualCalcTask {
const double *rhs;
ptrdiff_t rhs_stride;
- const double * const *diff_coeffs;
+ const double *diff_coeffs;
ptrdiff_t diff_coeffs_stride;
+ ptrdiff_t diff_coeffs_offset;
double u_mult;
double res_mult;
@@ -74,10 +75,10 @@ struct ResidualCalcInternal {
};
#if HAVE_NASM
-ResidualLineCalc mg2di_residual_calc_line_s1_fma3;
-ResidualLineCalc mg2di_residual_calc_line_s2_fma3;
-ResidualLineAdd mg2di_residual_add_line_s1_fma3;
-ResidualLineAdd mg2di_residual_add_line_s2_fma3;
+ResidualLineCalc mg2di_residual_line_calc_s1_fma3;
+ResidualLineCalc mg2di_residual_line_calc_s2_fma3;
+ResidualLineAdd mg2di_residual_line_add_s1_fma3;
+ResidualLineAdd mg2di_residual_line_add_s2_fma3;
#endif
static void
@@ -142,7 +143,7 @@ derivatives_calc_s2(double *dst, const double *u, ptrdiff_t stride)
static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t u_stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ const double *diff_coeffs, ptrdiff_t diff_coeffs_offset,
double res_mult)
{
double res_max = 0.0, res_abs;
@@ -154,7 +155,7 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
- res += u_vals[j] * diff_coeffs[j][i];
+ res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i];
dst[i] = res_mult * res;
res_abs = fabs(res);
@@ -166,7 +167,7 @@ static void residual_calc_line_s1_c(size_t linesize, double *dst, double *dst_ma
static void residual_add_line_s1_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t u_stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ const double *diff_coeffs, ptrdiff_t diff_coeffs_offset,
double res_mult, double u_mult)
{
double res_max = 0.0, res_abs;
@@ -178,7 +179,7 @@ static void residual_add_line_s1_c(size_t linesize, double *dst, double *dst_max
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
- res += u_vals[j] * diff_coeffs[j][i];
+ res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i];
dst[i] = u_mult * u[i] + res_mult * res;
res_abs = fabs(res);
@@ -190,7 +191,7 @@ static void residual_add_line_s1_c(size_t linesize, double *dst, double *dst_max
static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t u_stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ const double *diff_coeffs, ptrdiff_t diff_coeffs_offset,
double res_mult)
{
double res_max = 0.0, res_abs;
@@ -202,7 +203,7 @@ static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_ma
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
- res += u_vals[j] * diff_coeffs[j][i];
+ res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i];
dst[i] = res_mult * res;
res_abs = fabs(res);
@@ -214,7 +215,7 @@ static void residual_calc_line_s2_c(size_t linesize, double *dst, double *dst_ma
static void residual_add_line_s2_c(size_t linesize, double *dst, double *dst_max,
ptrdiff_t u_stride, const double *u, const double *rhs,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
+ const double *diff_coeffs, ptrdiff_t diff_coeffs_offset,
double res_mult, double u_mult)
{
double res_max = 0.0, res_abs;
@@ -226,7 +227,7 @@ static void residual_add_line_s2_c(size_t linesize, double *dst, double *dst_max
res = -rhs[i];
for (int j = 0; j < ARRAY_ELEMS(u_vals); j++)
- res += u_vals[j] * diff_coeffs[j][i];
+ res += u_vals[j] * diff_coeffs[j * diff_coeffs_offset + i];
dst[i] = u_mult * u[i] + res_mult * res;
res_abs = fabs(res);
@@ -241,24 +242,21 @@ static int residual_calc_task(void *arg, unsigned int job_idx, unsigned int thre
ResidualCalcInternal *priv = arg;
ResidualCalcTask *task = &priv->task;
- const double *diff_coeffs[MG2D_DIFF_COEFF_NB];
+ const double *diff_coeffs = task->diff_coeffs + job_idx * task->diff_coeffs_stride;
double *dst = task->dst + job_idx * task->dst_stride;
- for (int i = 0; i < ARRAY_ELEMS(diff_coeffs); i++)
- diff_coeffs[i] = task->diff_coeffs[i] + job_idx * task->diff_coeffs_stride;
-
if (task->u_mult == 0.0) {
priv->residual_line_calc(task->size[0], dst,
priv->residual_max + thread_idx * priv->calc_blocksize,
task->u_stride, task->u + job_idx * task->u_stride,
task->rhs + job_idx * task->rhs_stride,
- diff_coeffs, task->res_mult);
+ diff_coeffs, task->diff_coeffs_offset, task->res_mult);
} else {
priv->residual_line_add(task->size[0], dst,
priv->residual_max + thread_idx * priv->calc_blocksize,
task->u_stride, task->u + job_idx * task->u_stride,
task->rhs + job_idx * task->rhs_stride,
- diff_coeffs, task->res_mult, task->u_mult);
+ diff_coeffs, task->diff_coeffs_offset, task->res_mult, task->u_mult);
}
if (task->reflect & (1 << MG2D_BOUNDARY_0L)) {
@@ -286,8 +284,8 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
double *dst, ptrdiff_t dst_stride,
const double *u, ptrdiff_t u_stride,
const double *rhs, ptrdiff_t rhs_stride,
- const double * const diff_coeffs[MG2D_DIFF_COEFF_NB],
- ptrdiff_t diff_coeffs_stride,
+ const double *diff_coeffs, ptrdiff_t diff_coeffs_stride,
+ ptrdiff_t diff_coeffs_offset,
double u_mult, double res_mult,
int reflect, size_t reflect_dist)
{
@@ -307,6 +305,7 @@ int mg2di_residual_calc(ResidualCalcContext *ctx, size_t size[2],
task->rhs_stride = rhs_stride;
task->diff_coeffs = diff_coeffs;
task->diff_coeffs_stride = diff_coeffs_stride;
+ task->diff_coeffs_offset = diff_coeffs_offset;
task->u_mult = u_mult;
task->res_mult = res_mult;
task->reflect = reflect;
@@ -333,8 +332,8 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx)
priv->residual_line_add = residual_add_line_s1_c;
#if HAVE_NASM
if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
- priv->residual_line_calc = mg2di_residual_calc_line_s1_fma3;
- priv->residual_line_add = mg2di_residual_add_line_s1_fma3;
+ priv->residual_line_calc = mg2di_residual_line_calc_s1_fma3;
+ priv->residual_line_add = mg2di_residual_line_add_s1_fma3;
priv->calc_blocksize = 4;
}
#endif
@@ -344,8 +343,8 @@ int mg2di_residual_calc_init(ResidualCalcContext *ctx)
priv->residual_line_add = residual_add_line_s2_c;
#if HAVE_NASM
if (ctx->cpuflags & MG2DI_CPU_FLAG_FMA3) {
- priv->residual_line_calc = mg2di_residual_calc_line_s2_fma3;
- priv->residual_line_add = mg2di_residual_add_line_s2_fma3;
+ priv->residual_line_calc = mg2di_residual_line_calc_s2_fma3;
+ priv->residual_line_add = mg2di_residual_line_add_s2_fma3;
priv->calc_blocksize = 4;
}
#endif