From 373dbd15f216c2ce97396af37ea3f7cc5d39455c Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 9 Apr 2019 09:45:16 +0200 Subject: egs: add higher-order finite difference operators --- ell_grid_solve.c | 225 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 221 insertions(+), 4 deletions(-) (limited to 'ell_grid_solve.c') diff --git a/ell_grid_solve.c b/ell_grid_solve.c index dfc48c6..e334f58 100644 --- a/ell_grid_solve.c +++ b/ell_grid_solve.c @@ -111,6 +111,22 @@ static const double fd_denoms[][MG2D_DIFF_COEFF_NB] = { [MG2D_DIFF_COEFF_02] = 12.0, [MG2D_DIFF_COEFF_11] = 144.0, }, + { + [MG2D_DIFF_COEFF_00] = 1.0, + [MG2D_DIFF_COEFF_10] = 60.0, + [MG2D_DIFF_COEFF_01] = 60.0, + [MG2D_DIFF_COEFF_20] = 90.0, + [MG2D_DIFF_COEFF_02] = 90.0, + [MG2D_DIFF_COEFF_11] = 3600.0, + }, + { + [MG2D_DIFF_COEFF_00] = 1.0, + [MG2D_DIFF_COEFF_10] = 840.0, + [MG2D_DIFF_COEFF_01] = 840.0, + [MG2D_DIFF_COEFF_20] = 5040.0, + [MG2D_DIFF_COEFF_02] = 5040.0, + [MG2D_DIFF_COEFF_11] = SQR(840.0), + }, }; static void residual_calc(EGSContext *ctx) @@ -165,7 +181,9 @@ static void boundaries_apply_reflect(double *dst, const ptrdiff_t dst_stride[2], /* FIXME we currently use 1st-order forward differences in all cases, * since centered and/or higher-order FDs seem to be unstable * this may be suboptimal and should be investigated closer */ -static const double falloff_bnd_fd_factors[][FD_STENCIL_MAX * 2 + 1] = { +static const double falloff_bnd_fd_factors[][2] = { + { 1.0 / 2.0, -1.0 / 2.0, }, + { 1.0 / 2.0, -1.0 / 2.0, }, { 1.0 / 2.0, -1.0 / 2.0, }, { 1.0 / 2.0, -1.0 / 2.0, }, //{ -1.0, 8.0, 0.0, -8.0, 1.0 }, @@ -185,7 +203,7 @@ static void boundaries_apply_falloff(double *dst, const ptrdiff_t dst_stride[2], double val = row[-(ptrdiff_t)ctx->fd_stencil * dst_stride[1]] / r * (x / r); - for (int i = 1; i < ctx->fd_stencil * 2 + 1; i++) + for (int i = 1; i < ARRAY_ELEMS(falloff_bnd_fd_factors); i++) val += dst[(bnd_layer - i) * dst_stride[1] + bnd_idx * dst_stride[0]] * falloff_bnd_fd_factors[ctx->fd_stencil - 1][i] / ctx->step[0]; *row = -val * (ctx->step[0] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0]); @@ -405,6 +423,203 @@ static void fill_mat_s2(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_str mat_row[(-2 - 2 * row_stride) * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; } +static void fill_mat_s3(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_stride, + NDArray **diff_coeffs, double *fd_factors, + ptrdiff_t idx_src) +{ + mat_row[0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; + + mat_row[ 3 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[ 2 * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[ 1 * mat_stride] += 45.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[-1 * mat_stride] += -45.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[-2 * mat_stride] += 9.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[-3 * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + + mat_row[ 3 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[ 2 * row_stride * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[ 1 * row_stride * mat_stride] += 45.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[-1 * row_stride * mat_stride] += -45.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[-2 * row_stride * mat_stride] += 9.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[-3 * row_stride * mat_stride] += -1.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + + mat_row[ 3 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[ 2 * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[ 1 * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[ 0 * mat_stride] += -245.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[-1 * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[-2 * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[-3 * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + + mat_row[ 3 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[ 2 * row_stride * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[ 1 * row_stride * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[ 0 * row_stride * mat_stride] += -245.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[-1 * row_stride * mat_stride] += 135.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[-2 * row_stride * mat_stride] += -13.5 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[-3 * row_stride * mat_stride] += 1.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + + mat_row[( 3 + 3 * row_stride) * mat_stride] += (1.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 + 2 * row_stride) * mat_stride] += (1.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 + 1 * row_stride) * mat_stride] += (1.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 - 1 * row_stride) * mat_stride] += (1.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 - 2 * row_stride) * mat_stride] += (1.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 - 3 * row_stride) * mat_stride] += (1.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[( 2 + 3 * row_stride) * mat_stride] += (-9.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 + 2 * row_stride) * mat_stride] += (-9.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 + 1 * row_stride) * mat_stride] += (-9.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 - 1 * row_stride) * mat_stride] += (-9.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 - 2 * row_stride) * mat_stride] += (-9.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 - 3 * row_stride) * mat_stride] += (-9.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[( 1 + 3 * row_stride) * mat_stride] += (45.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 + 2 * row_stride) * mat_stride] += (45.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 + 1 * row_stride) * mat_stride] += (45.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 - 1 * row_stride) * mat_stride] += (45.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 - 2 * row_stride) * mat_stride] += (45.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 - 3 * row_stride) * mat_stride] += (45.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[(-1 + 3 * row_stride) * mat_stride] += (-45.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 + 2 * row_stride) * mat_stride] += (-45.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 + 1 * row_stride) * mat_stride] += (-45.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 - 1 * row_stride) * mat_stride] += (-45.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 - 2 * row_stride) * mat_stride] += (-45.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 - 3 * row_stride) * mat_stride] += (-45.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[(-2 + 3 * row_stride) * mat_stride] += (9.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 + 2 * row_stride) * mat_stride] += (9.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 + 1 * row_stride) * mat_stride] += (9.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 - 1 * row_stride) * mat_stride] += (9.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 - 2 * row_stride) * mat_stride] += (9.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 - 3 * row_stride) * mat_stride] += (9.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[(-3 + 3 * row_stride) * mat_stride] += (-1.0) * ( 1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 + 2 * row_stride) * mat_stride] += (-1.0) * ( -9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 + 1 * row_stride) * mat_stride] += (-1.0) * ( 45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 - 1 * row_stride) * mat_stride] += (-1.0) * (-45.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 - 2 * row_stride) * mat_stride] += (-1.0) * ( 9.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 - 3 * row_stride) * mat_stride] += (-1.0) * ( -1.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; +} + +static void fill_mat_s4(double *mat_row, ptrdiff_t mat_stride, ptrdiff_t row_stride, + NDArray **diff_coeffs, double *fd_factors, + ptrdiff_t idx_src) +{ + mat_row[0] += diff_coeffs[MG2D_DIFF_COEFF_00]->data[idx_src]; + + mat_row[ 4 * mat_stride] += -3.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[ 3 * mat_stride] += 32.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[ 2 * mat_stride] += -168.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[ 1 * mat_stride] += 672.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[-1 * mat_stride] += -672.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[-2 * mat_stride] += 168.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[-3 * mat_stride] += -32.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + mat_row[-4 * mat_stride] += 3.0 * diff_coeffs[MG2D_DIFF_COEFF_10]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_10]; + + mat_row[ 4 * row_stride * mat_stride] += -3.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[ 3 * row_stride * mat_stride] += 32.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[ 2 * row_stride * mat_stride] += -168.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[ 1 * row_stride * mat_stride] += 672.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[-1 * row_stride * mat_stride] += -672.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[-2 * row_stride * mat_stride] += 168.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[-3 * row_stride * mat_stride] += -32.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + mat_row[-4 * row_stride * mat_stride] += 3.0 * diff_coeffs[MG2D_DIFF_COEFF_01]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_01]; + + mat_row[ 4 * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[ 3 * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[ 2 * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[ 1 * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[ 0 * mat_stride] += -14350.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[-1 * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[-2 * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[-3 * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + mat_row[-4 * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_20]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_20]; + + mat_row[ 4 * row_stride * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[ 3 * row_stride * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[ 2 * row_stride * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[ 1 * row_stride * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[ 0 * row_stride * mat_stride] += -14350.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[-1 * row_stride * mat_stride] += 8064.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[-2 * row_stride * mat_stride] += -1008.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[-3 * row_stride * mat_stride] += 128.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + mat_row[-4 * row_stride * mat_stride] += -9.0 * diff_coeffs[MG2D_DIFF_COEFF_02]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_02]; + + mat_row[( 4 + 4 * row_stride) * mat_stride] += (-3.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 4 + 3 * row_stride) * mat_stride] += (-3.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 4 + 2 * row_stride) * mat_stride] += (-3.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 4 + 1 * row_stride) * mat_stride] += (-3.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 4 - 1 * row_stride) * mat_stride] += (-3.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 4 - 2 * row_stride) * mat_stride] += (-3.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 4 - 3 * row_stride) * mat_stride] += (-3.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 4 - 4 * row_stride) * mat_stride] += (-3.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[( 3 + 4 * row_stride) * mat_stride] += (32.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 + 3 * row_stride) * mat_stride] += (32.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 + 2 * row_stride) * mat_stride] += (32.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 + 1 * row_stride) * mat_stride] += (32.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 - 1 * row_stride) * mat_stride] += (32.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 - 2 * row_stride) * mat_stride] += (32.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 - 3 * row_stride) * mat_stride] += (32.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 3 - 4 * row_stride) * mat_stride] += (32.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[( 2 + 4 * row_stride) * mat_stride] += (-168.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 + 3 * row_stride) * mat_stride] += (-168.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 + 2 * row_stride) * mat_stride] += (-168.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 + 1 * row_stride) * mat_stride] += (-168.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 - 1 * row_stride) * mat_stride] += (-168.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 - 2 * row_stride) * mat_stride] += (-168.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 - 3 * row_stride) * mat_stride] += (-168.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 2 - 4 * row_stride) * mat_stride] += (-168.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[( 1 + 4 * row_stride) * mat_stride] += (672.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 + 3 * row_stride) * mat_stride] += (672.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 + 2 * row_stride) * mat_stride] += (672.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 + 1 * row_stride) * mat_stride] += (672.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 - 1 * row_stride) * mat_stride] += (672.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 - 2 * row_stride) * mat_stride] += (672.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 - 3 * row_stride) * mat_stride] += (672.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[( 1 - 4 * row_stride) * mat_stride] += (672.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[(-1 + 4 * row_stride) * mat_stride] += (-672.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 + 3 * row_stride) * mat_stride] += (-672.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 + 2 * row_stride) * mat_stride] += (-672.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 + 1 * row_stride) * mat_stride] += (-672.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 - 1 * row_stride) * mat_stride] += (-672.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 - 2 * row_stride) * mat_stride] += (-672.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 - 3 * row_stride) * mat_stride] += (-672.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-1 - 4 * row_stride) * mat_stride] += (-672.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[(-2 + 4 * row_stride) * mat_stride] += (168.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 + 3 * row_stride) * mat_stride] += (168.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 + 2 * row_stride) * mat_stride] += (168.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 + 1 * row_stride) * mat_stride] += (168.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 - 1 * row_stride) * mat_stride] += (168.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 - 2 * row_stride) * mat_stride] += (168.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 - 3 * row_stride) * mat_stride] += (168.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-2 - 4 * row_stride) * mat_stride] += (168.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[(-3 + 4 * row_stride) * mat_stride] += (-32.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 + 3 * row_stride) * mat_stride] += (-32.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 + 2 * row_stride) * mat_stride] += (-32.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 + 1 * row_stride) * mat_stride] += (-32.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 - 1 * row_stride) * mat_stride] += (-32.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 - 2 * row_stride) * mat_stride] += (-32.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 - 3 * row_stride) * mat_stride] += (-32.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-3 - 4 * row_stride) * mat_stride] += (-32.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + + mat_row[(-4 + 4 * row_stride) * mat_stride] += (3.0) * ( -3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-4 + 3 * row_stride) * mat_stride] += (3.0) * ( 32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-4 + 2 * row_stride) * mat_stride] += (3.0) * (-168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-4 + 1 * row_stride) * mat_stride] += (3.0) * ( 672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-4 - 1 * row_stride) * mat_stride] += (3.0) * (-672.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-4 - 2 * row_stride) * mat_stride] += (3.0) * ( 168.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-4 - 3 * row_stride) * mat_stride] += (3.0) * ( -32.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + mat_row[(-4 - 4 * row_stride) * mat_stride] += (3.0) * ( 3.0) * diff_coeffs[MG2D_DIFF_COEFF_11]->data[idx_src] * fd_factors[MG2D_DIFF_COEFF_11]; + +} + static void mat_fill_row(EGSContext *ctx, double *scratch_line, ptrdiff_t idx1, ptrdiff_t idx0) { @@ -538,7 +753,7 @@ static void mat_fill_row(EGSContext *ctx, double *scratch_line, const double r = sqrt(SQR(x) + SQR(y)); double val = dst[dir * bnd_layer * bnd_stride[1] + bnd_idx * bnd_stride[0]]; - for (int i = 1; i < ctx->fd_stencil * 2 + 1; i++) { + for (int i = 1; i < ARRAY_ELEMS(falloff_bnd_fd_factors); i++) { dst[dir * (bnd_layer - i) * bnd_stride[1] + bnd_idx * bnd_stride[0]] -= val * falloff_bnd_fd_factors[ctx->fd_stencil - 1][i] / falloff_bnd_fd_factors[ctx->fd_stencil - 1][0]; } @@ -732,6 +947,8 @@ int mg2di_egs_init(EGSContext *ctx, int flags) switch (ctx->fd_stencil) { case 1: e->fill_mat = fill_mat_s1; break; case 2: e->fill_mat = fill_mat_s2; break; + case 3: e->fill_mat = fill_mat_s3; break; + case 4: e->fill_mat = fill_mat_s4; break; default: mg2di_log(&ctx->logger, 0, "Invalid finite difference stencil: %zd\n", ctx->fd_stencil); @@ -873,7 +1090,7 @@ static int arrays_alloc(EGSContext *ctx, const size_t domain_size[2]) if (!e->mat || !e->mat_f || !e->rhs || !e->rhs_mat || !e->rhs_factor || !e->x || !e->ipiv) return -ENOMEM; - ret = mg2di_bicgstab_context_alloc(&e->bicgstab, e->N, 64); + ret = mg2di_bicgstab_context_alloc(&e->bicgstab, e->N, 24); if (ret < 0) return ret; -- cgit v1.2.3