aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2016-03-24 10:14:18 +0100
committerAnton Khirnov <anton@khirnov.net>2016-03-24 10:14:18 +0100
commite00a0cc3de18a7bcd8e53761bed037ec0a1dabec (patch)
treeb1e332df8d7bcbe975eac1ffa051c71bc8447011
parentd9e0aa0a1a5e2b8026519940cddfc34713ba1234 (diff)
Cartoonize.
-rw-r--r--src/Operators.c115
1 files changed, 81 insertions, 34 deletions
diff --git a/src/Operators.c b/src/Operators.c
index fa78524..2fed466 100644
--- a/src/Operators.c
+++ b/src/Operators.c
@@ -1,5 +1,6 @@
#include "Operators.h"
#include <assert.h>
+#include <math.h>
#include <stddef.h>
#include <stdlib.h>
@@ -30,7 +31,6 @@ void
op_real_set_0(CCTK_REAL *restrict const varptr,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] = 0.0;
}
@@ -43,7 +43,6 @@ op_real_set_1(CCTK_REAL *restrict const varptr,
CCTK_REAL const fact0,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] = fact0 * srcptr0[i];
}
@@ -58,7 +57,6 @@ op_real_set_2(CCTK_REAL *restrict const varptr,
CCTK_REAL const fact1,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] = fact0 * srcptr0[i] + fact1 * srcptr1[i];
}
@@ -75,7 +73,6 @@ op_real_set_3(CCTK_REAL *restrict const varptr,
CCTK_REAL const fact2,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] = fact0 * srcptr0[i] + fact1 * srcptr1[i] + fact2 * srcptr2[i];
}
@@ -87,7 +84,6 @@ op_real_update_0(CCTK_REAL *restrict const varptr,
CCTK_REAL const scale,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] = scale * varptr[i];
}
@@ -101,7 +97,6 @@ op_real_update_1(CCTK_REAL *restrict const varptr,
CCTK_REAL const fact0,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] = scale * varptr[i] + fact0 * srcptr0[i];
}
@@ -117,7 +112,6 @@ op_real_update_2(CCTK_REAL *restrict const varptr,
CCTK_REAL const fact1,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] = scale * varptr[i] + fact0 * srcptr0[i] + fact1 * srcptr1[i];
}
@@ -135,7 +129,6 @@ op_real_update_3(CCTK_REAL *restrict const varptr,
CCTK_REAL const fact2,
ptrdiff_t const npoints)
{
-#pragma omp parallel for
for (ptrdiff_t i=0; i<npoints; ++i) {
varptr[i] =
scale * varptr[i] +
@@ -164,10 +157,25 @@ MoL_LinearCombination(cGH const *const cctkGH,
}
// Determine grid variable size
+ double *y;
+ int y_idx = -1, i, j;
int const dim = CCTK_GroupDimFromVarI(var);
int ash[dim];
int const ierr = CCTK_GroupashVI(cctkGH, dim, ash, var);
assert(!ierr);
+
+ y = CCTK_VarDataPtr(cctkGH, 0, "grid::y");
+ assert(y);
+
+ for (i = 0; i < ash[1]; i++) {
+ if (fabs(y[CCTK_GFINDEX3D(cctkGH, 0, i, 0)]) < 1e-8) {
+ y_idx = i;
+ break;
+ }
+ }
+ if (y_idx < 0)
+ assert(0);
+
// TODO: check that all src variables have the same ash
ptrdiff_t npoints = 1;
for (int d=0; d<dim; ++d) {
@@ -194,29 +202,48 @@ MoL_LinearCombination(cGH const *const cctkGH,
// performance
switch (nsrcs) {
case 0:
- op_real_set_0(varptr, npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_set_0(varptr + offset, ash[0]);
+ }
break;
case 1:
- op_real_set_1(varptr, srcptrs[0], facts[0], npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_set_1(varptr + offset, srcptrs[0] + offset, facts[0], ash[0]);
+ }
break;
case 2:
- op_real_set_2(varptr,
- srcptrs[0], facts[0], srcptrs[1], facts[1], npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_set_2(varptr + offset, srcptrs[0] + offset, facts[0],
+ srcptrs[1] + offset, facts[1], ash[0]);
+ }
break;
case 3:
- op_real_set_3(varptr,
- srcptrs[0], facts[0], srcptrs[1], facts[1],
- srcptrs[2], facts[2], npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_set_3(varptr + offset,
+ srcptrs[0] + offset, facts[0], srcptrs[1] + offset, facts[1],
+ srcptrs[2] + offset, facts[2], ash[0]);
+ }
break;
default:
// Loop over all grid points
#pragma omp parallel for
- for (ptrdiff_t i=0; i<npoints; ++i) {
- CCTK_REAL tmp = 0.0;
- for (int n=0; n<nsrcs; ++n) {
- tmp += facts[n] * srcptrs[n][i];
- }
- varptr[i] = tmp;
+ for (i = 0; i < ash[2]; i++) {
+ for (j = 0; j < ash[0]; j++) {
+ int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i);
+ CCTK_REAL tmp = 0.0;
+ for (int n=0; n<nsrcs; ++n) {
+ tmp += facts[n] * srcptrs[n][idx];
+ }
+ varptr[idx] = tmp;
+ }
}
break;
}
@@ -228,29 +255,49 @@ MoL_LinearCombination(cGH const *const cctkGH,
// performance
switch (nsrcs) {
case 0:
- op_real_update_0(varptr, scale, npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_update_0(varptr + offset, scale, ash[0]);
+ }
break;
case 1:
- op_real_update_1(varptr, scale, srcptrs[0], facts[0], npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_update_1(varptr + offset, scale, srcptrs[0] + offset, facts[0], ash[0]);
+ }
break;
case 2:
- op_real_update_2(varptr, scale,
- srcptrs[0], facts[0], srcptrs[1], facts[1], npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_update_2(varptr + offset, scale,
+ srcptrs[0] + offset, facts[0],
+ srcptrs[1] + offset, facts[1], ash[0]);
+ }
break;
case 3:
- op_real_update_3(varptr, scale,
- srcptrs[0], facts[0], srcptrs[1], facts[1],
- srcptrs[2], facts[2], npoints);
+#pragma omp parallel for
+ for (i = 0; i < ash[2]; i++) {
+ int offset = CCTK_GFINDEX3D(cctkGH, 0, y_idx, i);
+ op_real_update_3(varptr + offset, scale,
+ srcptrs[0] + offset, facts[0], srcptrs[1] + offset, facts[1],
+ srcptrs[2] + offset, facts[2], ash[0]);
+ }
break;
default:
// Loop over all grid points
#pragma omp parallel for
- for (ptrdiff_t i=0; i<npoints; ++i) {
- CCTK_REAL tmp = scale * varptr[i];
- for (int n=0; n<nsrcs; ++n) {
- tmp += facts[n] * srcptrs[n][i];
- }
- varptr[i] = tmp;
+ for (i = 0; i < ash[2]; i++) {
+ for (j = 0; j < ash[0]; j++) {
+ int idx = CCTK_GFINDEX3D(cctkGH, j, y_idx, i);
+ CCTK_REAL tmp = scale * varptr[idx];
+ for (int n=0; n<nsrcs; ++n) {
+ tmp += facts[n] * srcptrs[n][idx];
+ }
+ varptr[idx] = tmp;
+ }
}
break;
}