aboutsummaryrefslogtreecommitdiff
path: root/Carpet/LoopControl/src/loopcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'Carpet/LoopControl/src/loopcontrol.c')
-rw-r--r--Carpet/LoopControl/src/loopcontrol.c132
1 files changed, 119 insertions, 13 deletions
diff --git a/Carpet/LoopControl/src/loopcontrol.c b/Carpet/LoopControl/src/loopcontrol.c
index 83a09a313..b4e4bf328 100644
--- a/Carpet/LoopControl/src/loopcontrol.c
+++ b/Carpet/LoopControl/src/loopcontrol.c
@@ -898,12 +898,13 @@ lc_control_init (lc_control_t * restrict const lc,
lc->kkkstep = (lc->kkkmax - lc->kkkmin + lt->knthreads-1) / lt->knthreads;
/* Find location of current thread */
- lc->thread_num = omp_get_thread_num();
- int c = lc->thread_num;
- int const ci = c % lt->inthreads; c /= lt->inthreads;
- int const cj = c % lt->jnthreads; c /= lt->jnthreads;
- int const ck = c % lt->knthreads; c /= lt->knthreads;
- /* see below where c is continued to be used */
+ lc->thread_num = omp_get_thread_num();
+ int c_outer =
+ lc->thread_num / (lt->inithreads * lt->jnithreads * lt->knithreads);
+ int const ci = c_outer % lt->inthreads; c_outer /= lt->inthreads;
+ int const cj = c_outer % lt->jnthreads; c_outer /= lt->jnthreads;
+ int const ck = c_outer % lt->knthreads; c_outer /= lt->knthreads;
+ assert (c_outer == 0);
lc->iii = lc->iiimin + ci * lc->iiistep;
lc->jjj = lc->jjjmin + cj * lc->jjjstep;
lc->kkk = lc->kkkmin + ck * lc->kkkstep;
@@ -928,11 +929,12 @@ lc_control_init (lc_control_t * restrict const lc,
/*** Inner threads **********************************************************/
/* Inner loop thread parallelism */
- /* (No thread parallelism yet) */
- int const cii = c % lt->inithreads; c /= lt->inithreads;
- int const cjj = c % lt->jnithreads; c /= lt->jnithreads;
- int const ckk = c % lt->knithreads; c /= lt->knithreads;
- assert (c == 0);
+ int c_inner =
+ lc->thread_num % (lt->inithreads * lt->jnithreads * lt->knithreads);
+ int const cii = c_inner % lt->inithreads; c_inner /= lt->inithreads;
+ int const cjj = c_inner % lt->jnithreads; c_inner /= lt->jnithreads;
+ int const ckk = c_inner % lt->knithreads; c_inner /= lt->knithreads;
+ assert (c_inner == 0);
lc->iiii = cii;
lc->jjjj = cjj;
lc->kkkk = ckk;
@@ -951,6 +953,23 @@ lc_control_init (lc_control_t * restrict const lc,
/****************************************************************************/
+ /* Self test */
+ if (do_selftest) {
+ char * restrict mem;
+#pragma omp single copyprivate (mem)
+ {
+ mem =
+ calloc (lc->ilsh * lc->jlsh * lc->klsh, sizeof * lc->selftest_count);
+ }
+ lc->selftest_count = mem;
+ } else {
+ lc->selftest_count = NULL;
+ }
+
+
+
+ /****************************************************************************/
+
/* Timer */
lc->time_calc_begin = omp_get_wtime();
}
@@ -958,8 +977,26 @@ lc_control_init (lc_control_t * restrict const lc,
void
+lc_control_selftest (lc_control_t * restrict const lc,
+ int const imin, int const imax, int const j, int const k)
+{
+ assert (imin >= 0 && imax <= lc->ilsh);
+ assert (j >= 0 && j < lc->jlsh);
+ assert (k >= 0 && k < lc->klsh);
+ for (int i = imin; i < imax; ++i) {
+ int const ind3d = i + lc->ilsh * (j + lc->jlsh * k);
+#pragma omp atomic
+ ++ lc->selftest_count[ind3d];
+ }
+}
+
+
+
+void
lc_control_finish (lc_control_t * restrict const lc)
{
+ DECLARE_CCTK_PARAMETERS;
+
lc_stattime_t * restrict const lt = lc->stattime;
lc_statset_t * restrict const ls = lc->statset;
@@ -967,7 +1004,6 @@ lc_control_finish (lc_control_t * restrict const lc)
int first_iteration;
#pragma omp single copyprivate (ignore_iteration)
{
- DECLARE_CCTK_PARAMETERS;
ignore_iteration = ignore_initial_overhead && lt->time_count == 0.0;
first_iteration = lt->time_count_init == 0.0;
}
@@ -1021,7 +1057,6 @@ lc_control_finish (lc_control_t * restrict const lc)
/* #pragma omp barrier */
{
- DECLARE_CCTK_PARAMETERS;
if (use_simulated_annealing) {
#pragma omp single
{
@@ -1035,6 +1070,77 @@ lc_control_finish (lc_control_t * restrict const lc)
}
}
}
+
+ /* Perform self-check */
+
+ if (do_selftest) {
+ /* Ensure all threads have finished the loop */
+#pragma omp barrier
+ ;
+ /* Assert that exactly the specified points have been set */
+ static int failure = 0;
+#pragma omp for reduction(+: failure)
+ for (int k=0; k<lc->klsh; ++k) {
+ for (int j=0; j<lc->jlsh; ++j) {
+ for (int i=0; i<lc->ilsh; ++i) {
+ int const ind3d = i + lc->ilsh * (j + lc->jlsh * k);
+ char const inside =
+ (i >= lc->imin && i < lc->imax) &&
+ (j >= lc->jmin && j < lc->jmax) &&
+ (k >= lc->kmin && k < lc->kmax);
+ if (lc->selftest_count[ind3d] != inside) {
+ ++ failure;
+#pragma omp critical
+ {
+ fprintf (stderr, " i=[%d,%d,%d] count=%d expected=%d\n",
+ i, j, k,
+ (int) lc->selftest_count[ind3d], (int) inside);
+ }
+ }
+ }
+ }
+ }
+ if (failure) {
+ for (int n=0; n<omp_get_num_threads(); ++n) {
+ if (n == omp_get_thread_num()) {
+ fprintf (stderr, "Thread: %d\n", n);
+ fprintf (stderr, " Arguments:\n");
+ fprintf (stderr, " imin=[%d,%d,%d]\n", lc->imin, lc->jmin, lc->kmin);
+ fprintf (stderr, " imax=[%d,%d,%d]\n", lc->imax, lc->jmax, lc->kmax);
+ fprintf (stderr, " ilsh=[%d,%d,%d]\n", lc->ilsh, lc->jlsh, lc->klsh);
+ fprintf (stderr, " di=%d\n", lc->di);
+ fprintf (stderr, " Thread parallellism:\n");
+ fprintf (stderr, " iiimin =[%d,%d,%d]\n", lc->iiimin, lc->jjjmin, lc->kkkmin);
+ fprintf (stderr, " iiimax =[%d,%d,%d]\n", lc->iiimax, lc->jjjmax, lc->kkkmax);
+ fprintf (stderr, " iiistep=[%d,%d,%d]\n", lc->iiistep, lc->jjjstep, lc->kkkstep);
+ fprintf (stderr, " Current thread:\n");
+ fprintf (stderr, " thread_num=%d\n", lc->thread_num);
+ fprintf (stderr, " iii =[%d,%d,%d]\n", lc->iii, lc->jjj, lc->kkk);
+ fprintf (stderr, " iiii =[%d,%d,%d]\n", lc->iiii, lc->jjjj, lc->kkkk);
+ fprintf (stderr, " Tiling loop:\n");
+ fprintf (stderr, " iimin =[%d,%d,%d]\n", lc->iimin, lc->jjmin, lc->kkmin);
+ fprintf (stderr, " iimax =[%d,%d,%d]\n", lc->iimax, lc->jjmax, lc->kkmax);
+ fprintf (stderr, " iistep=[%d,%d,%d]\n", lc->iistep, lc->jjstep, lc->kkstep);
+ fprintf (stderr, " Inner thread parallelism:\n");
+ fprintf (stderr, " iiiimin =[%d,%d,%d]\n", lc->iiiimin, lc->jjjjmin, lc->kkkkmin);
+ fprintf (stderr, " iiiimax =[%d,%d,%d]\n", lc->iiiimax, lc->jjjjmax, lc->kkkkmax);
+ fprintf (stderr, " iiiistep=[%d,%d,%d]\n", lc->iiiistep, lc->jjjjstep, lc->kkkkstep);
+ }
+#pragma omp barrier
+ ;
+ }
+#pragma omp critical
+ {
+ CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "LoopControl loop \"%s\" self-test failed",
+ lc->statmap->name);
+ }
+ }
+#pragma omp single nowait
+ {
+ free (lc->selftest_count);
+ }
+ }
}