diff options
author | Anton Khirnov <anton@khirnov.net> | 2016-03-24 10:14:18 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2016-03-24 10:14:18 +0100 |
commit | e00a0cc3de18a7bcd8e53761bed037ec0a1dabec (patch) | |
tree | b1e332df8d7bcbe975eac1ffa051c71bc8447011 | |
parent | d9e0aa0a1a5e2b8026519940cddfc34713ba1234 (diff) |
Cartoonize.
-rw-r--r-- | src/Operators.c | 115 |
1 files changed, 81 insertions, 34 deletions
diff --git a/src/Operators.c b/src/Operators.c index fa78524..2fed466 100644 --- a/src/Operators.c +++ b/src/Operators.c @@ -1,5 +1,6 @@ #include "Operators.h" #include <assert.h> +#include <math.h> #include <stddef.h> #include <stdlib.h> @@ -30,7 +31,6 @@ void op_real_set_0(CCTK_REAL *restrict const varptr, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = 0.0; } @@ -43,7 +43,6 @@ op_real_set_1(CCTK_REAL *restrict const varptr, CCTK_REAL const fact0, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = fact0 * srcptr0[i]; } @@ -58,7 +57,6 @@ op_real_set_2(CCTK_REAL *restrict const varptr, CCTK_REAL const fact1, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = fact0 * srcptr0[i] + fact1 * srcptr1[i]; } @@ -75,7 +73,6 @@ op_real_set_3(CCTK_REAL *restrict const varptr, CCTK_REAL const fact2, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = fact0 * srcptr0[i] + fact1 * srcptr1[i] + fact2 * srcptr2[i]; } @@ -87,7 +84,6 @@ op_real_update_0(CCTK_REAL *restrict const varptr, CCTK_REAL const scale, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = scale * varptr[i]; } @@ -101,7 +97,6 @@ op_real_update_1(CCTK_REAL *restrict const varptr, CCTK_REAL const fact0, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = scale * varptr[i] + fact0 * srcptr0[i]; } @@ -117,7 +112,6 @@ op_real_update_2(CCTK_REAL *restrict const varptr, CCTK_REAL const fact1, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = scale * varptr[i] + fact0 * srcptr0[i] + fact1 * srcptr1[i]; } @@ -135,7 +129,6 @@ op_real_update_3(CCTK_REAL *restrict const varptr, CCTK_REAL const fact2, ptrdiff_t const npoints) { -#pragma omp parallel for for (ptrdiff_t i=0; i<npoints; ++i) { varptr[i] = scale * varptr[i] + @@ -164,10 +157,25 @@ MoL_LinearCombination(cGH const *const cctkGH, } // Determine grid variable size + double *y; + int y_idx = -1, i, j; int const dim = CCTK_GroupDimFromVarI(var); int ash[dim]; int const ierr = CCTK_GroupashVI(cctkGH, dim, ash, var); assert(!ierr); + + y = CCTK_VarDataPtr(cctkGH, 0, "grid::y"); + assert(y); + + for (i = 0; i < ash[1]; i++) { + if (fabs(y[CCTK_GFINDEX3D(cctkGH, 0, i, 0)]) < 1e-8) { + y_idx = i; + break; + } + } + if (y_idx < 0) + assert(0); + // TODO: check that all src variables have the same ash ptrdiff_t npoints = 1; for (int d=0; d<dim; ++d) { @@ -194,29 +202,48 @@ MoL_LinearCombination(cGH const *const cctkGH, // performance switch (nsrcs) { case 0: - op_real_set_0(varptr, npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_set_0(varptr + offset, ash[0]); + } break; case 1: - op_real_set_1(varptr, srcptrs[0], facts[0], npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_set_1(varptr + offset, srcptrs[0] + offset, facts[0], ash[0]); + } break; case 2: - op_real_set_2(varptr, - srcptrs[0], facts[0], srcptrs[1], facts[1], npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_set_2(varptr + offset, srcptrs[0] + offset, facts[0], + srcptrs[1] + offset, facts[1], ash[0]); + } break; case 3: - op_real_set_3(varptr, - srcptrs[0], facts[0], srcptrs[1], facts[1], - srcptrs[2], facts[2], npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_set_3(varptr + offset, + srcptrs[0] + offset, facts[0], srcptrs[1] + offset, facts[1], + srcptrs[2] + offset, facts[2], ash[0]); + } break; default: // Loop over all grid points #pragma omp parallel for - for (ptrdiff_t i=0; i<npoints; ++i) { - CCTK_REAL tmp = 0.0; - for (int n=0; n<nsrcs; ++n) { - tmp += facts[n] * srcptrs[n][i]; - } - varptr[i] = tmp; + for (i = 0; i < ash[2]; i++) { + for (j = 0; j < ash[0]; j++) { + int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i); + CCTK_REAL tmp = 0.0; + for (int n=0; n<nsrcs; ++n) { + tmp += facts[n] * srcptrs[n][idx]; + } + varptr[idx] = tmp; + } } break; } @@ -228,29 +255,49 @@ MoL_LinearCombination(cGH const *const cctkGH, // performance switch (nsrcs) { case 0: - op_real_update_0(varptr, scale, npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_update_0(varptr + offset, scale, ash[0]); + } break; case 1: - op_real_update_1(varptr, scale, srcptrs[0], facts[0], npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_update_1(varptr + offset, scale, srcptrs[0] + offset, facts[0], ash[0]); + } break; case 2: - op_real_update_2(varptr, scale, - srcptrs[0], facts[0], srcptrs[1], facts[1], npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_update_2(varptr + offset, scale, + srcptrs[0] + offset, facts[0], + srcptrs[1] + offset, facts[1], ash[0]); + } break; case 3: - op_real_update_3(varptr, scale, - srcptrs[0], facts[0], srcptrs[1], facts[1], - srcptrs[2], facts[2], npoints); +#pragma omp parallel for + for (i = 0; i < ash[2]; i++) { + int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i); + op_real_update_3(varptr + offset, scale, + srcptrs[0] + offset, facts[0], srcptrs[1] + offset, facts[1], + srcptrs[2] + offset, facts[2], ash[0]); + } break; default: // Loop over all grid points #pragma omp parallel for - for (ptrdiff_t i=0; i<npoints; ++i) { - CCTK_REAL tmp = scale * varptr[i]; - for (int n=0; n<nsrcs; ++n) { - tmp += facts[n] * srcptrs[n][i]; - } - varptr[i] = tmp; + for (i = 0; i < ash[2]; i++) { + for (j = 0; j < ash[0]; j++) { + int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i); + CCTK_REAL tmp = scale * varptr[idx]; + for (int n=0; n<nsrcs; ++n) { + tmp += facts[n] * srcptrs[n][idx]; + } + varptr[idx] = tmp; + } } break; } |