From f52486a2382b5e1036b608d663ccece5d2de220d Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 9 Apr 2019 09:45:16 +0200 Subject: egs: add higher-order finite difference operators --- ell_grid_solve.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 220 insertions(+), 3 deletions(-) (limited to 'ell_grid_solve.c') diff --git a/ell_grid_solve.c b/ell_grid_solve.c index dad8861..d796b26 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -44,6 +44,8 @@ static const double relax_factors[FD_STENCIL_MAX] = { [0] = 1.0 / 5, [1] = 1.0 / 7, + [2] = 1.0 / 7, + [3] = 1.0 / 7, }; typedef struct EGSRelaxInternal { @@ -140,6 +142,22 @@ static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = { [MG2D_DIFF_COEFF_02] = 12.0, [MG2D_DIFF_COEFF_11] = 144.0, }, + { + [MG2D_DIFF_COEFF_00] = 1.0, + [MG2D_DIFF_COEFF_10] = 60.0, + [MG2D_DIFF_COEFF_01] = 60.0, + [MG2D_DIFF_COEFF_20] = 90.0, + [MG2D_DIFF_COEFF_02] = 90.0, + [MG2D_DIFF_COEFF_11] = 3600.0, + }, + { + [MG2D_DIFF_COEFF_00] = 1.0, + [MG2D_DIFF_COEFF_10] = 840.0, + [MG2D_DIFF_COEFF_01] = 840.0, + [MG2D_DIFF_COEFF_20] = 5040.0, + [MG2D_DIFF_COEFF_02] = 5040.0, + [MG2D_DIFF_COEFF_11] = SQR(840.0), + }, }; static void boundaries_sync(EGSContext *ctx, NDArray *a_dst) @@ -227,7 +245,9 @@ static void boundaries_apply_reflect(double *dst, const ptrdiff_t dst_stride[2], /* FIXME we currently use 1st-order forward differences in all cases, * since centered and/or higher-order FDs seem to be unstable * this may be suboptimal and should be investigated closer */ -static const double falloff_bnd_fd_factors[][FD_STENCIL_MAX * 2 + 1] = { +static const double falloff_bnd_fd_factors[][2] = { + { 1.0 / 2.0, -1.0 / 2.0, }, + { 1.0 / 2.0, -1.0 / 2.0, }, { 1.0 / 2.0, -1.0 / 2.0, }, { 1.0 / 2.0, -1.0 / 2.0, }, //{ -1.0, 8.0, 0.0, -8.0, 1.0 }, @@ -248,7 +268,7 @@ static void boundaries_apply_falloff(double *dst, const ptrdiff_t dst_stride[2], double val = row[-(ptrdiff_t)ctx->fd_stencil * dst_stride[1]] / r * (x / r); - for (int i = 1; i < ctx->fd_stencil * 2 + 1; i++) + for (int i = 1; i < ARRAY_ELEMS(falloff_bnd_fd_factors[0]); i++) val += dst[(bnd_layer - i) * dst_stride[1] + bnd_idx * dst_stride[0]] * falloff_bnd_fd_factors[ctx->fd_stencil - 1][i] / ctx->step[0]; *row = -val * (ctx->step[0] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0]); @@ -574,6 +594,201 @@ static void fill_mat_s2(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_str mat_row[(-2 - 2 * row_stride) * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; } +static void fill_mat_s3(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_stride, + NDArray **diff_coeffs, ptrdiff_t idx_src) +{ + mat_row[0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; + + mat_row[ 3 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[ 2 * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[ 1 * mat_stride] += 45.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[-1 * mat_stride] += -45.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[-2 * mat_stride] += 9.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[-3 * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + + mat_row[ 3 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[ 2 * row_stride * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[ 1 * row_stride * mat_stride] += 45.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[-1 * row_stride * mat_stride] += -45.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[-2 * row_stride * mat_stride] += 9.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[-3 * row_stride * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + + mat_row[ 3 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[ 2 * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[ 1 * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[ 0 * mat_stride] += -245.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[-1 * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[-2 * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[-3 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + + mat_row[ 3 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[ 2 * row_stride * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[ 1 * row_stride * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[ 0 * row_stride * mat_stride] += -245.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[-1 * row_stride * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[-2 * row_stride * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[-3 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + + mat_row[( 3 + 3 * row_stride) * mat_stride] += (1.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 + 2 * row_stride) * mat_stride] += (1.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 + 1 * row_stride) * mat_stride] += (1.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 - 1 * row_stride) * mat_stride] += (1.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 - 2 * row_stride) * mat_stride] += (1.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 - 3 * row_stride) * mat_stride] += (1.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[( 2 + 3 * row_stride) * mat_stride] += (-9.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 + 2 * row_stride) * mat_stride] += (-9.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 + 1 * row_stride) * mat_stride] += (-9.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 - 1 * row_stride) * mat_stride] += (-9.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 - 2 * row_stride) * mat_stride] += (-9.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 - 3 * row_stride) * mat_stride] += (-9.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[( 1 + 3 * row_stride) * mat_stride] += (45.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 + 2 * row_stride) * mat_stride] += (45.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 + 1 * row_stride) * mat_stride] += (45.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 - 1 * row_stride) * mat_stride] += (45.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 - 2 * row_stride) * mat_stride] += (45.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 - 3 * row_stride) * mat_stride] += (45.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[(-1 + 3 * row_stride) * mat_stride] += (-45.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 + 2 * row_stride) * mat_stride] += (-45.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 + 1 * row_stride) * mat_stride] += (-45.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 - 1 * row_stride) * mat_stride] += (-45.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 - 2 * row_stride) * mat_stride] += (-45.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 - 3 * row_stride) * mat_stride] += (-45.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[(-2 + 3 * row_stride) * mat_stride] += (9.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 + 2 * row_stride) * mat_stride] += (9.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 + 1 * row_stride) * mat_stride] += (9.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 - 1 * row_stride) * mat_stride] += (9.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 - 2 * row_stride) * mat_stride] += (9.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 - 3 * row_stride) * mat_stride] += (9.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[(-3 + 3 * row_stride) * mat_stride] += (-1.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 + 2 * row_stride) * mat_stride] += (-1.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 + 1 * row_stride) * mat_stride] += (-1.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 - 1 * row_stride) * mat_stride] += (-1.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 - 2 * row_stride) * mat_stride] += (-1.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 - 3 * row_stride) * mat_stride] += (-1.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; +} + +static void fill_mat_s4(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_stride, + NDArray **diff_coeffs, ptrdiff_t idx_src) +{ + mat_row[0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; + + mat_row[ 4 * mat_stride] += -3.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[ 3 * mat_stride] += 32.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[ 2 * mat_stride] += -168.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[ 1 * mat_stride] += 672.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[-1 * mat_stride] += -672.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[-2 * mat_stride] += 168.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[-3 * mat_stride] += -32.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + mat_row[-4 * mat_stride] += 3.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src]; + + mat_row[ 4 * row_stride * mat_stride] += -3.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[ 3 * row_stride * mat_stride] += 32.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[ 2 * row_stride * mat_stride] += -168.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[ 1 * row_stride * mat_stride] += 672.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[-1 * row_stride * mat_stride] += -672.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[-2 * row_stride * mat_stride] += 168.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[-3 * row_stride * mat_stride] += -32.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + mat_row[-4 * row_stride * mat_stride] += 3.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src]; + + mat_row[ 4 * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[ 3 * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[ 2 * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[ 1 * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[ 0 * mat_stride] += -14350.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[-1 * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[-2 * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[-3 * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + mat_row[-4 * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src]; + + mat_row[ 4 * row_stride * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[ 3 * row_stride * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[ 2 * row_stride * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[ 1 * row_stride * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[ 0 * row_stride * mat_stride] += -14350.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[-1 * row_stride * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[-2 * row_stride * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[-3 * row_stride * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + mat_row[-4 * row_stride * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src]; + + mat_row[( 4 + 4 * row_stride) * mat_stride] += (-3.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 4 + 3 * row_stride) * mat_stride] += (-3.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 4 + 2 * row_stride) * mat_stride] += (-3.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 4 + 1 * row_stride) * mat_stride] += (-3.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 4 - 1 * row_stride) * mat_stride] += (-3.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 4 - 2 * row_stride) * mat_stride] += (-3.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 4 - 3 * row_stride) * mat_stride] += (-3.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 4 - 4 * row_stride) * mat_stride] += (-3.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[( 3 + 4 * row_stride) * mat_stride] += (32.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 + 3 * row_stride) * mat_stride] += (32.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 + 2 * row_stride) * mat_stride] += (32.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 + 1 * row_stride) * mat_stride] += (32.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 - 1 * row_stride) * mat_stride] += (32.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 - 2 * row_stride) * mat_stride] += (32.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 - 3 * row_stride) * mat_stride] += (32.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 3 - 4 * row_stride) * mat_stride] += (32.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[( 2 + 4 * row_stride) * mat_stride] += (-168.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 + 3 * row_stride) * mat_stride] += (-168.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 + 2 * row_stride) * mat_stride] += (-168.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 + 1 * row_stride) * mat_stride] += (-168.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 - 1 * row_stride) * mat_stride] += (-168.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 - 2 * row_stride) * mat_stride] += (-168.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 - 3 * row_stride) * mat_stride] += (-168.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 2 - 4 * row_stride) * mat_stride] += (-168.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[( 1 + 4 * row_stride) * mat_stride] += (672.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 + 3 * row_stride) * mat_stride] += (672.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 + 2 * row_stride) * mat_stride] += (672.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 + 1 * row_stride) * mat_stride] += (672.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 - 1 * row_stride) * mat_stride] += (672.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 - 2 * row_stride) * mat_stride] += (672.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 - 3 * row_stride) * mat_stride] += (672.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[( 1 - 4 * row_stride) * mat_stride] += (672.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[(-1 + 4 * row_stride) * mat_stride] += (-672.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 + 3 * row_stride) * mat_stride] += (-672.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 + 2 * row_stride) * mat_stride] += (-672.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 + 1 * row_stride) * mat_stride] += (-672.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 - 1 * row_stride) * mat_stride] += (-672.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 - 2 * row_stride) * mat_stride] += (-672.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 - 3 * row_stride) * mat_stride] += (-672.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-1 - 4 * row_stride) * mat_stride] += (-672.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[(-2 + 4 * row_stride) * mat_stride] += (168.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 + 3 * row_stride) * mat_stride] += (168.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 + 2 * row_stride) * mat_stride] += (168.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 + 1 * row_stride) * mat_stride] += (168.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 - 1 * row_stride) * mat_stride] += (168.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 - 2 * row_stride) * mat_stride] += (168.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 - 3 * row_stride) * mat_stride] += (168.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-2 - 4 * row_stride) * mat_stride] += (168.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[(-3 + 4 * row_stride) * mat_stride] += (-32.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 + 3 * row_stride) * mat_stride] += (-32.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 + 2 * row_stride) * mat_stride] += (-32.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 + 1 * row_stride) * mat_stride] += (-32.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 - 1 * row_stride) * mat_stride] += (-32.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 - 2 * row_stride) * mat_stride] += (-32.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 - 3 * row_stride) * mat_stride] += (-32.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-3 - 4 * row_stride) * mat_stride] += (-32.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + + mat_row[(-4 + 4 * row_stride) * mat_stride] += (3.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-4 + 3 * row_stride) * mat_stride] += (3.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-4 + 2 * row_stride) * mat_stride] += (3.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-4 + 1 * row_stride) * mat_stride] += (3.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-4 - 1 * row_stride) * mat_stride] += (3.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-4 - 2 * row_stride) * mat_stride] += (3.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-4 - 3 * row_stride) * mat_stride] += (3.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + mat_row[(-4 - 4 * row_stride) * mat_stride] += (3.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src]; + +} + static void mat_fill_row(EGSContext *ctx, double *scratch_line, ptrdiff_t idx1, ptrdiff_t idx0) { @@ -707,7 +922,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line, const double r = sqrt(SQR(x) + SQR(y)); double val = dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]]; - for (int i = 1; i < ctx->fd_stencil * 2 + 1; i++) { + for (int i = 1; i < ARRAY_ELEMS(falloff_bnd_fd_factors[0]); i++) { dst[dir * (bnd_layer - i) * bnd_stride[1] + bnd_idx * bnd_stride[0]] -= val * falloff_bnd_fd_factors[ctx->fd_stencil - 1][i] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0]; } @@ -1021,6 +1236,8 @@ int mg2di_egs_init(EGSContext *ctx, int flags) switch (ctx->fd_stencil) { case 1: priv->e.fill_mat = fill_mat_s1; break; case 2: priv->e.fill_mat = fill_mat_s2; break; + case 3: priv->e.fill_mat = fill_mat_s3; break; + case 4: priv->e.fill_mat = fill_mat_s4; break; default: mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n", ctx->fd_stencil); -- cgit v1.2.3