aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreschnett <eschnett@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8>2011-05-09 00:15:54 +0000
committereschnett <eschnett@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8>2011-05-09 00:15:54 +0000
commitff98db7a7345b1c59e435b19b6df9a127c011fdb (patch)
tree38a77d011a9060de9b283c7956fcbcc7ddeab38c
parentac8adc4833daa18913ee765132345ad1c1a228b9 (diff)
Reorganise some of the internals of thorn Slab:
Use LoopControl? to parallelise loops via OpenMP. Refactor the "work horse" routines that perform the actual copy routines. These routines are specialised for common cases that need to execute efficiently, in particular for the cases encountered in RotatingSymmetry?90 and RotatingSymmetry?180 when handling CCTK_REAL variables. Offer an additional API (Slab_MultiTransfer_Init, Slab_MultiTransfer_Apply, Slab_MultiTransfer_Finalize) that calculates the communication schedule only once, and then re-uses it in further calls. This avoids some communication overhead. Remove old CVS header comments. git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@77 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8
-rw-r--r--README2
-rw-r--r--configuration.ccl5
-rw-r--r--doc/documentation.tex2
-rw-r--r--interface.ccl3
-rw-r--r--param.ccl1
-rw-r--r--schedule.ccl1
-rw-r--r--src/make.code.defn1
-rw-r--r--src/slab.cc784
-rw-r--r--src/slab.h64
-rw-r--r--src/slab.inc1
10 files changed, 550 insertions, 314 deletions
diff --git a/README b/README
index 8faf88b..9ba6886 100644
--- a/README
+++ b/README
@@ -1,5 +1,3 @@
-CVS info : $Header$
-
Cactus Code Thorn Slab
Thorn Author(s) : Erik Schnetter <schnetter@cct.lsu.edu>
Thorn Maintainer(s) : Erik Schnetter <schnetter@cct.lsu.edu>
diff --git a/configuration.ccl b/configuration.ccl
index c56ea13..9e327e2 100644
--- a/configuration.ccl
+++ b/configuration.ccl
@@ -1,6 +1,9 @@
# Configuration definition for thorn Slab
-# $Header$
PROVIDES Slab
{
}
+
+OPTIONAL LoopControl
+{
+}
diff --git a/doc/documentation.tex b/doc/documentation.tex
index b76d9f4..ebcd415 100644
--- a/doc/documentation.tex
+++ b/doc/documentation.tex
@@ -2,7 +2,6 @@
% Cactus Thorn template for ThornGuide documentation
% Author: Ian Kelley
% Date: Sun Jun 02, 2002
-% $Header$
%
% Thorn documentation in the latex file doc/documentation.tex
% will be included in ThornGuides built with the Cactus make system.
@@ -64,7 +63,6 @@
% *======================================================================*
% If you are using CVS use this line to give version information
-% $Header$
\documentclass{article}
diff --git a/interface.ccl b/interface.ccl
index 7110914..4a6810d 100644
--- a/interface.ccl
+++ b/interface.ccl
@@ -1,11 +1,12 @@
# Interface definition for thorn Slab
-# $Header$
IMPLEMENTS: Slab
INCLUDES HEADER: slab.h IN Slab.h
INCLUDES HEADER: slab.inc IN Slab.inc
+USES INCLUDE HEADER: loopcontrol.h
+
CCTK_POINTER_TO_CONST \
FUNCTION GetMPICommWorld (CCTK_POINTER_TO_CONST IN cctkGH)
USES FUNCTION GetMPICommWorld
diff --git a/param.ccl b/param.ccl
index e3d2ae2..4dbd3c8 100644
--- a/param.ccl
+++ b/param.ccl
@@ -1,5 +1,4 @@
# Parameter definitions for thorn Slab
-# $Header$
BOOLEAN timer_output "Print slabbing timings at shutdown time" STEERABLE=always
{
diff --git a/schedule.ccl b/schedule.ccl
index 74dca51..5958b4f 100644
--- a/schedule.ccl
+++ b/schedule.ccl
@@ -1,5 +1,4 @@
# Schedule definitions for thorn Slab
-# $Header$
SCHEDULE Slab_InitMPIDatatypes AT startup after Driver_Startup
{
diff --git a/src/make.code.defn b/src/make.code.defn
index 5c35cf2..612e3ed 100644
--- a/src/make.code.defn
+++ b/src/make.code.defn
@@ -1,5 +1,4 @@
# Main make.code.defn file for thorn Slab
-# $Header$
# Source files in this directory
SRCS = slab.cc
diff --git a/src/slab.cc b/src/slab.cc
index 3af399a..c032af8 100644
--- a/src/slab.cc
+++ b/src/slab.cc
@@ -37,6 +37,8 @@
#include "util_ErrorCodes.h"
#include "util_Table.h"
+#include "loopcontrol.h"
+
#ifdef CCTK_MPI
# include <mpi.h>
# define HAVE_MPI 1
@@ -75,10 +77,21 @@ using namespace std;
-static int timer_init = -1;
-static int timer_copy_in = -1;
-static int timer_xfer = -1;
-static int timer_copy_back = -1;
+static int timer_init = -1;
+static int timer_apply = -1;
+static int timer_copy_in = -1;
+static int timer_copy_in_noxpose = -1;
+static int timer_copy_in_xposexy = -1;
+static int timer_copy_in_xposegeneral = -1;
+static int timer_copy_in_general = -1;
+static int timer_xfer = -1;
+static int timer_copy_back = -1;
+static int timer_copy_back_noflip = -1;
+static int timer_copy_back_flipx = -1;
+static int timer_copy_back_flipy = -1;
+static int timer_copy_back_flipxy = -1;
+static int timer_copy_back_flipgeneral = -1;
+static int timer_copy_back_general = -1;
@@ -87,20 +100,42 @@ void
Slab_InitTimers (CCTK_ARGUMENTS)
{
DECLARE_CCTK_ARGUMENTS;
- timer_init = CCTK_TimerCreate ("Slab/init");
- timer_copy_in = CCTK_TimerCreate ("Slab/copy in");
- timer_xfer = CCTK_TimerCreate ("Slab/xfer");
- timer_copy_back = CCTK_TimerCreate ("Slab/copy back");
+ timer_init = CCTK_TimerCreate ("Slab/init" );
+ timer_apply = CCTK_TimerCreate ("Slab/apply" );
+ timer_copy_in = CCTK_TimerCreate ("Slab/copy_in" );
+ timer_copy_in_noxpose = CCTK_TimerCreate ("Slab/copy_in_noxpose" );
+ timer_copy_in_xposexy = CCTK_TimerCreate ("Slab/copy_in_xposexy" );
+ timer_copy_in_xposegeneral = CCTK_TimerCreate ("Slab/copy_in_xposegeneral" );
+ timer_copy_in_general = CCTK_TimerCreate ("Slab/copy_in_general" );
+ timer_xfer = CCTK_TimerCreate ("Slab/xfer" );
+ timer_copy_back = CCTK_TimerCreate ("Slab/copy_back" );
+ timer_copy_back_noflip = CCTK_TimerCreate ("Slab/copy_back_noflip" );
+ timer_copy_back_flipx = CCTK_TimerCreate ("Slab/copy_back_flipx" );
+ timer_copy_back_flipy = CCTK_TimerCreate ("Slab/copy_back_flipy" );
+ timer_copy_back_flipxy = CCTK_TimerCreate ("Slab/copy_back_flipxy" );
+ timer_copy_back_flipgeneral = CCTK_TimerCreate ("Slab/copy_back_flipgeneral");
+ timer_copy_back_general = CCTK_TimerCreate ("Slab/copy_back_general" );
}
extern "C"
int
Slab_PrintTimers ()
{
- CCTK_TimerPrintDataI (timer_init , -1);
- CCTK_TimerPrintDataI (timer_copy_in , -1);
- CCTK_TimerPrintDataI (timer_xfer , -1);
- CCTK_TimerPrintDataI (timer_copy_back, -1);
+ CCTK_TimerPrintDataI (timer_init , -1);
+ CCTK_TimerPrintDataI (timer_apply , -1);
+ CCTK_TimerPrintDataI (timer_copy_in , -1);
+ CCTK_TimerPrintDataI (timer_copy_in_noxpose , -1);
+ CCTK_TimerPrintDataI (timer_copy_in_xposexy , -1);
+ CCTK_TimerPrintDataI (timer_copy_in_xposegeneral , -1);
+ CCTK_TimerPrintDataI (timer_copy_in_general , -1);
+ CCTK_TimerPrintDataI (timer_xfer , -1);
+ CCTK_TimerPrintDataI (timer_copy_back , -1);
+ CCTK_TimerPrintDataI (timer_copy_back_noflip , -1);
+ CCTK_TimerPrintDataI (timer_copy_back_flipx , -1);
+ CCTK_TimerPrintDataI (timer_copy_back_flipy , -1);
+ CCTK_TimerPrintDataI (timer_copy_back_flipxy , -1);
+ CCTK_TimerPrintDataI (timer_copy_back_flipgeneral, -1);
+ CCTK_TimerPrintDataI (timer_copy_back_general , -1);
return 0;
}
@@ -375,16 +410,25 @@ Slab_InitMPIDatatypes ()
{
#ifdef CCTK_MPI
# ifdef HAVE_CCTK_REAL4
+ assert (CACTUS_MPI_REAL4 != MPI_DATATYPE_NULL);
MPI_Type_contiguous (2, CACTUS_MPI_REAL4, &CACTUS_MPI_COMPLEX8);
MPI_Type_commit (&CACTUS_MPI_COMPLEX8);
# endif
# ifdef HAVE_CCTK_REAL8
+ assert (CACTUS_MPI_REAL8 != MPI_DATATYPE_NULL);
MPI_Type_contiguous (2, CACTUS_MPI_REAL8, &CACTUS_MPI_COMPLEX16);
MPI_Type_commit (&CACTUS_MPI_COMPLEX16);
# endif
# ifdef HAVE_CCTK_REAL16
- MPI_Type_contiguous (2, CACTUS_MPI_REAL16, &CACTUS_MPI_COMPLEX32);
- MPI_Type_commit (&CACTUS_MPI_COMPLEX32);
+ if (CACTUS_MPI_REAL16 != MPI_DATATYPE_NULL) {
+ MPI_Type_contiguous (2, CACTUS_MPI_REAL16, &CACTUS_MPI_COMPLEX32);
+ MPI_Type_commit (&CACTUS_MPI_COMPLEX32);
+ } else {
+ // CCTK_REAL16 is not supported by MPI
+ CCTK_WARN (CCTK_WARN_ALERT,
+ "CCTK_REAL16 support is enabled in Cactus, but is not supported by MPI. All MPI operations with this datatype will fail.");
+ CACTUS_MPI_COMPLEX32 = MPI_DATATYPE_NULL;
+ }
# endif
#endif
@@ -708,8 +752,10 @@ print_xferinfo (FILE * const out,
fprintf (out, " flip: %d\n", xferinfo->flip);
}
-// workhorse routine responsible for the actual copying/transposing of data
-template<typename T> inline void
+// workhorse routines for the actual copying transposing, and flipping
+// of data
+template<typename T>
+inline void
copy_data (const vector<xfer> &info,
const vector<bbox> &srcdetail,
const vector<int> &srcoffset,
@@ -719,9 +765,9 @@ copy_data (const vector<xfer> &info,
const int n,
const vector<int> &varis,
const int nvaris,
- const int xpose_x=0,
- const int xpose_y=1,
- const int xpose_z=2)
+ const int xpose_x,
+ const int xpose_y,
+ const int xpose_z)
{
assert (srcptrs);
@@ -740,11 +786,11 @@ copy_data (const vector<xfer> &info,
int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+0].len;
int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len;
int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len;
-
+
int const dstdetailleni = srcdetail[n*SLAB_MAXDIM+xpose_x].len;
int const dstdetaillenj = srcdetail[n*SLAB_MAXDIM+xpose_y].len;
- //int const dstdetaillenk = srcdetail[n*SLAB_MAXDIM+xpose_z].len; unused
-
+ int const dstdetaillenk = srcdetail[n*SLAB_MAXDIM+xpose_z].len;
+
if (n==0) assert (srcoffset[n]==0);
// TODO: This does not take nvaris into account
// if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk);
@@ -757,35 +803,91 @@ copy_data (const vector<xfer> &info,
}
}
}
-
+
+ assert (dstdetailleni*dstdetaillenj*dstdetaillenk == srcelems[n]);
+ if (srcelems[n] == 0) return;
+
for (int vari=0; vari<nvaris; ++vari) {
T * restrict const srcdataptr =
(T *)&srcdata.front() + srcoffset[n] + vari * srcelems[n];
- T const * restrict const srcptr =
- (T const *)srcptrs[varis[vari]];
+ T const * restrict const srcptr = (T const *)srcptrs[varis[vari]];
assert(srcptr);
-# pragma omp parallel for
- for (int k = 0; k < srcdetaillenk; ++k) {
- for (int j = 0; j < srcdetaillenj; ++j) {
- for (int i = 0; i < srcdetailleni; ++i) {
- int ipos[SLAB_MAXDIM];
- ipos[0] = i;
- ipos[1] = j;
- ipos[2] = k;
- int const srcindi = srcdetailoffi + ipos[0] - srcoffi;
- int const srcindj = srcdetailoffj + ipos[1] - srcoffj;
- int const srcindk = srcdetailoffk + ipos[2] - srcoffk;
- ifcheck assert (srcindi>=0 and srcindi<srcleni);
- ifcheck assert (srcindj>=0 and srcindj<srclenj);
- ifcheck assert (srcindk>=0 and srcindk<srclenk);
- size_t const srcind =
- srcindi + srcleni * (srcindj + srclenj * srcindk);
- size_t const bufind =
- ipos[xpose_x] + dstdetailleni * (ipos[xpose_y] + dstdetaillenj * ipos[xpose_z]);
- srcdataptr[bufind] = srcptr[srcind];
- }
- }
+ if (xpose_x==0 and xpose_y==1 and xpose_z==2) {
+ // no transposition
+
+ CCTK_TimerStartI (timer_copy_in_noxpose);
+# pragma omp parallel
+ CCTK_LOOP3(Slab_copy_in_noxpose, i,j,k,
+ 0,0,0, srcdetailleni,srcdetaillenj,srcdetaillenk,
+ srcleni,srcleni*srclenj,srcleni*srclenj*srclenk)
+ {
+ int const srcindi = srcdetailoffi + i - srcoffi;
+ int const srcindj = srcdetailoffj + j - srcoffj;
+ int const srcindk = srcdetailoffk + k - srcoffk;
+ ifcheck assert (srcindi>=0 and srcindi<srcleni);
+ ifcheck assert (srcindj>=0 and srcindj<srclenj);
+ ifcheck assert (srcindk>=0 and srcindk<srclenk);
+ size_t const srcind =
+ srcindi + srcleni * (srcindj + srclenj * srcindk);
+ size_t const bufind =
+ i + dstdetailleni * (j + dstdetaillenj * k);
+ srcdataptr[bufind] = srcptr[srcind];
+ } CCTK_ENDLOOP3(Slab_copy_in_noxpose);
+ CCTK_TimerStopI (timer_copy_in_noxpose);
+
+ } else if (xpose_x==1 and xpose_y==0 and xpose_z==2) {
+ // transpose x and y
+
+ CCTK_TimerStartI (timer_copy_in_xposexy);
+# pragma omp parallel
+ // Interchange i and j loops
+ CCTK_LOOP3(Slab_copy_in_xposexy, j,i,k,
+ 0,0,0, srcdetaillenj,srcdetailleni,srcdetaillenk,
+ srcleni,srcleni*srclenj,srcleni*srclenj*srclenk)
+ {
+ int const srcindi = srcdetailoffi + i - srcoffi;
+ int const srcindj = srcdetailoffj + j - srcoffj;
+ int const srcindk = srcdetailoffk + k - srcoffk;
+ ifcheck assert (srcindi>=0 and srcindi<srcleni);
+ ifcheck assert (srcindj>=0 and srcindj<srclenj);
+ ifcheck assert (srcindk>=0 and srcindk<srclenk);
+ size_t const srcind =
+ srcindi + srcleni * (srcindj + srclenj * srcindk);
+ size_t const bufind =
+ j + dstdetailleni * (i + dstdetaillenj * k);
+ srcdataptr[bufind] = srcptr[srcind];
+ } CCTK_ENDLOOP3(Slab_copy_in_xposexy);
+ CCTK_TimerStopI (timer_copy_in_xposexy);
+
+ } else {
+ // general transposition
+
+ CCTK_TimerStartI (timer_copy_in_xposegeneral);
+# pragma omp parallel
+ CCTK_LOOP3(Slab_copy_in_xposegeneral, i,j,k,
+ 0,0,0, srcdetailleni,srcdetaillenj,srcdetaillenk,
+ srcleni,srcleni*srclenj,srcleni*srclenj*srclenk)
+ {
+ int ipos[SLAB_MAXDIM];
+ ipos[0] = i;
+ ipos[1] = j;
+ ipos[2] = k;
+ int const srcindi = srcdetailoffi + i - srcoffi;
+ int const srcindj = srcdetailoffj + j - srcoffj;
+ int const srcindk = srcdetailoffk + k - srcoffk;
+ ifcheck assert (srcindi>=0 and srcindi<srcleni);
+ ifcheck assert (srcindj>=0 and srcindj<srclenj);
+ ifcheck assert (srcindk>=0 and srcindk<srclenk);
+ size_t const srcind =
+ srcindi + srcleni * (srcindj + srclenj * srcindk);
+ size_t const bufind =
+ (ipos[xpose_x] + dstdetailleni *
+ (ipos[xpose_y] + dstdetaillenj * ipos[xpose_z]));
+ srcdataptr[bufind] = srcptr[srcind];
+ } CCTK_ENDLOOP3(Slab_copy_in_xposegeneral);
+ CCTK_TimerStopI (timer_copy_in_xposegeneral);
+
}
} // for vari
@@ -802,9 +904,9 @@ copy_data_back (const vector<xfer> &info,
const int n,
const vector<int> &varis,
const int nvaris,
- const bool flip_x=false,
- const bool flip_y=false,
- const bool flip_z=false)
+ const bool flip_x,
+ const bool flip_y,
+ const bool flip_z)
{
assert (dstptrs);
@@ -824,46 +926,157 @@ copy_data_back (const vector<xfer> &info,
int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len;
int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len;
+ assert (dstdetailleni*dstdetaillenj*dstdetaillenk == dstelems[n]);
+ if (dstelems[n] == 0) return;
+
for (int vari=0; vari<nvaris; ++vari) {
T * restrict const dstptr = (T *)dstptrs[varis[vari]];
assert (dstptr);
T const * restrict const dstdataptr =
- (T const *)&dstdata.front() +
- dstoffset[n] + vari * dstelems[n];
+ (T const *)&dstdata.front() + dstoffset[n] + vari * dstelems[n];
-# pragma omp parallel for
- for (int k = 0; k < dstdetaillenk; ++k) {
- for (int j = 0; j < dstdetaillenj; ++j) {
- for (int i = 0; i < dstdetailleni; ++i) {
- int const dstindi = dstdetailoffi + (flip_x ? dstdetailleni - 1 - i : i) - dstoffi;
- int const dstindj = dstdetailoffj + (flip_y ? dstdetaillenj - 1 - j : j) - dstoffj;
- int const dstindk = dstdetailoffk + (flip_z ? dstdetaillenk - 1 - k : k) - dstoffk;
- ifcheck assert (dstindi>=0 and dstindi<dstleni);
- ifcheck assert (dstindj>=0 and dstindj<dstlenj);
- ifcheck assert (dstindk>=0 and dstindk<dstlenk);
- size_t const dstind =
- dstindi + dstleni * (dstindj + dstlenj * dstindk);
- size_t const bufind =
- i + dstdetailleni * (j + dstdetaillenj * k);
- dstptr[dstind] = dstdataptr[bufind];
- }
- }
+ if (not flip_x and not flip_y and not flip_z) {
+ // no flipping
+
+ CCTK_TimerStartI (timer_copy_back_noflip);
+# pragma omp parallel
+ CCTK_LOOP3(Slab_copy_back_noflip, i,j,k,
+ 0,0,0, dstdetailleni,dstdetaillenj,dstdetaillenk,
+ dstleni,dstleni*dstlenj,dstleni*dstlenj*dstlenk)
+ {
+ int const dstindi = dstdetailoffi + i - dstoffi;
+ int const dstindj = dstdetailoffj + j - dstoffj;
+ int const dstindk = dstdetailoffk + k - dstoffk;
+ ifcheck assert (dstindi>=0 and dstindi<dstleni);
+ ifcheck assert (dstindj>=0 and dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 and dstindk<dstlenk);
+ size_t const dstind =
+ dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ size_t const bufind =
+ i + dstdetailleni * (j + dstdetaillenj * k);
+ dstptr[dstind] = dstdataptr[bufind];
+ } CCTK_ENDLOOP3(Slab_copy_back_noflip);
+ CCTK_TimerStartI (timer_copy_back_noflip);
+
+ } else if (flip_x and not flip_y and not flip_z) {
+ // flip in x direction
+
+ CCTK_TimerStartI (timer_copy_back_flipx);
+# pragma omp parallel
+ CCTK_LOOP3(Slab_copy_back_flipx, i,j,k,
+ 0,0,0, dstdetailleni,dstdetaillenj,dstdetaillenk,
+ dstleni,dstleni*dstlenj,dstleni*dstlenj*dstlenk)
+ {
+ int const dstindi = dstdetailoffi + (dstdetailleni - 1 - i) - dstoffi;
+ int const dstindj = dstdetailoffj + j - dstoffj;
+ int const dstindk = dstdetailoffk + k - dstoffk;
+ ifcheck assert (dstindi>=0 and dstindi<dstleni);
+ ifcheck assert (dstindj>=0 and dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 and dstindk<dstlenk);
+ size_t const dstind =
+ dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ size_t const bufind =
+ i + dstdetailleni * (j + dstdetaillenj * k);
+ dstptr[dstind] = dstdataptr[bufind];
+ } CCTK_ENDLOOP3(Slab_copy_back_flipx);
+ CCTK_TimerStopI (timer_copy_back_flipx);
+
+ } else if (not flip_x and flip_y and not flip_z) {
+ // flip in y direction
+
+ CCTK_TimerStartI (timer_copy_back_flipy);
+# pragma omp parallel
+ CCTK_LOOP3(Slab_copy_back_flipy, i,j,k,
+ 0,0,0, dstdetailleni,dstdetaillenj,dstdetaillenk,
+ dstleni,dstleni*dstlenj,dstleni*dstlenj*dstlenk)
+ {
+ int const dstindi = dstdetailoffi + i - dstoffi;
+ int const dstindj = dstdetailoffj + (dstdetaillenj - 1 - j) - dstoffj;
+ int const dstindk = dstdetailoffk + k - dstoffk;
+ ifcheck assert (dstindi>=0 and dstindi<dstleni);
+ ifcheck assert (dstindj>=0 and dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 and dstindk<dstlenk);
+ size_t const dstind =
+ dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ size_t const bufind =
+ i + dstdetailleni * (j + dstdetaillenj * k);
+ dstptr[dstind] = dstdataptr[bufind];
+ } CCTK_ENDLOOP3(Slab_copy_back_flipy);
+ CCTK_TimerStopI (timer_copy_back_flipy);
+
+ } else if (flip_x and flip_y and not flip_z) {
+ // flip in both x and y direction
+
+ CCTK_TimerStartI (timer_copy_back_flipxy);
+# pragma omp parallel
+ CCTK_LOOP3(Slab_copy_back_flipxy, i,j,k,
+ 0,0,0, dstdetailleni,dstdetaillenj,dstdetaillenk,
+ dstleni,dstleni*dstlenj,dstleni*dstlenj*dstlenk)
+ {
+ int const dstindi = dstdetailoffi + (dstdetailleni - 1 - i) - dstoffi;
+ int const dstindj = dstdetailoffj + (dstdetaillenj - 1 - j) - dstoffj;
+ int const dstindk = dstdetailoffk + k - dstoffk;
+ ifcheck assert (dstindi>=0 and dstindi<dstleni);
+ ifcheck assert (dstindj>=0 and dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 and dstindk<dstlenk);
+ size_t const dstind =
+ dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ size_t const bufind =
+ i + dstdetailleni * (j + dstdetaillenj * k);
+ dstptr[dstind] = dstdataptr[bufind];
+ } CCTK_ENDLOOP3(Slab_copy_back_flipxy);
+ CCTK_TimerStopI (timer_copy_back_flipxy);
+
+ } else {
+ // general flipping
+
+ CCTK_TimerStartI (timer_copy_back_flipgeneral);
+# pragma omp parallel
+ CCTK_LOOP3(Slab_copy_back_flipgeneral, i,j,k,
+ 0,0,0, dstdetailleni,dstdetaillenj,dstdetaillenk,
+ dstleni,dstleni*dstlenj,dstleni*dstlenj*dstlenk)
+ {
+ int const dstindi =
+ dstdetailoffi + (flip_x ? dstdetailleni - 1 - i : i) - dstoffi;
+ int const dstindj =
+ dstdetailoffj + (flip_y ? dstdetaillenj - 1 - j : j) - dstoffj;
+ int const dstindk =
+ dstdetailoffk + (flip_z ? dstdetaillenk - 1 - k : k) - dstoffk;
+ ifcheck assert (dstindi>=0 and dstindi<dstleni);
+ ifcheck assert (dstindj>=0 and dstindj<dstlenj);
+ ifcheck assert (dstindk>=0 and dstindk<dstlenk);
+ size_t const dstind =
+ dstindi + dstleni * (dstindj + dstlenj * dstindk);
+ size_t const bufind =
+ i + dstdetailleni * (j + dstdetaillenj * k);
+ dstptr[dstind] = dstdataptr[bufind];
+ } CCTK_ENDLOOP3(Slab_copy_back_flipgeneral);
+ CCTK_TimerStopI (timer_copy_back_flipgeneral);
+
}
} // for vari
}
+
+
+struct slabsetup {
+ MPI_Comm comm;
+ vector<xfer> info;
+ vector<xfer> allinfo;
+ vector<bbox> srcdetail, dstdetail;
+ size_t srclentot, dstlentot;
+};
+
+
+
extern "C"
-int
-Slab_MultiTransfer (cGH const * restrict const cctkGH,
- int const dim,
- xferinfo const * restrict const xferinfo,
- int const options,
- int const nvars,
- int const * restrict const srctypes,
- void const * restrict const * restrict const srcptrs,
- int const * restrict const dsttypes,
- void * restrict const * restrict const dstptrs)
+slabsetup *
+Slab_MultiTransfer_Init
+(cGH const* restrict const cctkGH,
+ int const dim,
+ xferinfo const* restrict const xferinfo,
+ int const options)
{
DECLARE_CCTK_PARAMETERS;
@@ -871,20 +1084,15 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
check (cctkGH);
check (dim >= 0);
check (xferinfo);
- check (nvars >= 0);
- check (nvars==0 or srctypes);
- for (int var=0; var<nvars; ++var) check (srctypes[var] >= 0);
- check (nvars==0 or srcptrs);
- // for (int var=0; var<nvars; ++var) check (srcptrs[var]);
- check (nvars==0 or dsttypes);
- for (int var=0; var<nvars; ++var) check (dsttypes[var] >= 0);
- check (nvars==0 or dstptrs);
- // for (int var=0; var<nvars; ++var) check (dstptrs[var]);
- if (nvars==0) return 0;
+
CCTK_TimerStartI (timer_init);
+ slabsetup * CCTK_RESTRICT const slabsetup = new struct slabsetup;
+
+
+
bool useghosts;
{
CCTK_INT tmp;
@@ -904,7 +1112,8 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
}
check (dim <= SLAB_MAXDIM);
- vector<xfer> info (SLAB_MAXDIM);
+ vector<xfer>& info = slabsetup->info;
+ info.resize (SLAB_MAXDIM);
for (int d=0; d<dim; ++d) {
global2bbox (&xferinfo[d].src, &info[d].src.global);
local2bbox (&xferinfo[d].src, &info[d].src.local);
@@ -1004,20 +1213,18 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
}
}
- size_t srclentot = 1;
- size_t dstlentot = 1;
+ size_t& srclentot = slabsetup->srclentot;
+ size_t& dstlentot = slabsetup->dstlentot;
+ srclentot = 1;
+ dstlentot = 1;
for (int d=0; d<SLAB_MAXDIM; ++d) {
srclentot *= info[d].src.local.len;
dstlentot *= info[d].dst.local.len;
}
- // Check arguments (continued)
- for (int var=0; var<nvars; ++var) if (srclentot > 0) assert (srcptrs[var]);
- for (int var=0; var<nvars; ++var) if (dstlentot > 0) assert (dstptrs[var]);
-
- MPI_Comm comm;
+ MPI_Comm& comm = slabsetup->comm;
{
CCTK_POINTER_TO_CONST tmp1;
int const iret1 = Util_TableGetPointerToConst (options, &tmp1, "comm");
@@ -1025,7 +1232,7 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
// There was an entry, use it
comm = * (MPI_Comm const *) tmp1;
} else if (iret1 == UTIL_ERROR_TABLE_WRONG_DATA_TYPE) {
- // Entry has wront type, fall back
+ // Entry has wrong type, fall back
CCTK_POINTER tmp2;
int const iret2 = Util_TableGetPointer (options, &tmp2, "comm");
if (iret2 == 1) {
@@ -1066,7 +1273,8 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
- vector<xfer> allinfo (size * SLAB_MAXDIM);
+ vector<xfer>& allinfo = slabsetup->allinfo;
+ allinfo.resize (size * SLAB_MAXDIM);
{
int const info_nints = sizeof(xfer) / sizeof(int);
ifdebug fflush (stdout);
@@ -1119,7 +1327,8 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
- vector<bbox> srcdetail (size * SLAB_MAXDIM);
+ vector<bbox>& srcdetail = slabsetup->srcdetail;
+ srcdetail.resize (size * SLAB_MAXDIM);
for (int n = 0; n < size; ++n) {
ifdebug printf ("srcdetail n=%d:\n", n);
for (int d=0; d<SLAB_MAXDIM; ++d) {
@@ -1158,13 +1367,10 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
}
}
- vector<int> srcelems (size);
- vector<int> srccount (size);
- vector<int> srcoffset (size + 1);
-
- vector<bbox> dstdetail (size * SLAB_MAXDIM);
+ vector<bbox>& dstdetail = slabsetup->dstdetail;
+ dstdetail.resize (size * SLAB_MAXDIM);
for (int n = 0; n < size; ++n) {
ifdebug printf ("dstdetail n=%d:\n", n);
for (int d=0; d<SLAB_MAXDIM; ++d) {
@@ -1209,12 +1415,86 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
}
}
+
+
+ CCTK_TimerStopI (timer_init);
+
+ return slabsetup;
+}
+
+
+
+extern "C"
+int
+Slab_MultiTransfer_Apply
+(cGH const * restrict const cctkGH,
+ slabsetup const * restrict const slabsetup,
+ int const nvars,
+ int const * restrict const srctypes,
+ void const * restrict const * restrict const srcptrs,
+ int const * restrict const dsttypes,
+ void * restrict const * restrict const dstptrs)
+{
+ DECLARE_CCTK_PARAMETERS;
+
+ // Check arguments
+ check (cctkGH);
+ check (slabsetup);
+ check (nvars >= 0);
+ check (nvars==0 or srctypes);
+ for (int var=0; var<nvars; ++var) check (srctypes[var] >= 0);
+ check (nvars==0 or srcptrs);
+ size_t const& srclentot = slabsetup->srclentot;
+ for (int var=0; var<nvars; ++var) if (srclentot > 0) assert (srcptrs[var]);
+ check (nvars==0 or dsttypes);
+ for (int var=0; var<nvars; ++var) check (dsttypes[var] >= 0);
+ check (nvars==0 or dstptrs);
+ size_t const& dstlentot = slabsetup->dstlentot;
+ for (int var=0; var<nvars; ++var) if (dstlentot > 0) assert (dstptrs[var]);
+
+ if (nvars==0) return 0;
+
+
+
+ CCTK_TimerStartI (timer_apply);
+
+
+
+ MPI_Comm const& comm = slabsetup->comm;
+
+ ifcheck {
+ ifdebug fflush (stdout);
+ MPI_Barrier (comm);
+ }
+
+ int size, rank;
+ MPI_Comm_size (comm, &size);
+ MPI_Comm_rank (comm, &rank);
+
+ ifcheck {
+ static int count = 424242;
+ int mycount = count;
+ ifdebug fflush (stdout);
+ MPI_Bcast (&mycount, 1, MPI_INT, 0, comm);
+ assert (mycount == count);
+ ++ count;
+ }
+
+
+
+ vector<xfer> const& info = slabsetup->info;
+ vector<xfer> const& allinfo = slabsetup->allinfo;
+ vector<bbox> const& srcdetail = slabsetup->srcdetail;
+ vector<bbox> const& dstdetail = slabsetup->dstdetail;
+
+ vector<int> srcelems (size);
+ vector<int> srccount (size);
+ vector<int> srcoffset (size + 1);
+
vector<int> dstelems (size);
vector<int> dstcount (size);
vector<int> dstoffset (size + 1);
- CCTK_TimerStopI (timer_init);
-
int nvartypes = 0;
@@ -1321,79 +1601,44 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
check (dst2count[n] == srccount[n]);
}
}
-
-
-
+
+
+
CCTK_TimerStartI (timer_copy_in);
for (int n = 0; n < size; ++n) {
check (SLAB_MAXDIM == 3);
- if (info[0].xpose==0 and info[1].xpose==1 and info[2].xpose==2 and
- srcdetail[n*SLAB_MAXDIM ].str==1 and
+ if (srcdetail[n*SLAB_MAXDIM ].str==1 and
srcdetail[n*SLAB_MAXDIM+1].str==1 and
srcdetail[n*SLAB_MAXDIM+2].str==1 and
vartype == CCTK_VARIABLE_REAL)
{
- // Optimised version for a special case: no transposing
-
- copy_data<CCTK_REAL> (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
- n, varis, nvaris);
-
- } else if (info[0].xpose==1 and info[1].xpose==0 and info[2].xpose==2 and
- srcdetail[n*SLAB_MAXDIM ].str==1 and
- srcdetail[n*SLAB_MAXDIM+1].str==1 and
- srcdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_REAL)
- {
- // Optimised version for a special case: transpose x and y
-
- copy_data<CCTK_REAL> (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
- n, varis, nvaris, 1, 0, 2);
-
+ // Optimised for stride 1 and CCTK_REAL
+ copy_data<CCTK_REAL>
+ (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
+ n, varis, nvaris, info[0].xpose, info[1].xpose, info[2].xpose);
} else if (srcdetail[n*SLAB_MAXDIM ].str==1 and
srcdetail[n*SLAB_MAXDIM+1].str==1 and
srcdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_REAL)
- {
- // Optimised version for CCTK_REAL and stride 1
-
- copy_data<CCTK_REAL> (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
- n, varis, nvaris, info[0].xpose, info[1].xpose, info[2].xpose);
-
- } else if (info[0].xpose==0 and info[1].xpose==1 and info[2].xpose==2 and
- srcdetail[n*SLAB_MAXDIM ].str==1 and
- srcdetail[n*SLAB_MAXDIM+1].str==1 and
- srcdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_INT)
+ vartype == CCTK_VARIABLE_COMPLEX)
{
- // Optimised version for a special case: no transposing
-
- copy_data<CCTK_INT> (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
- n, varis, nvaris);
-
- } else if (info[0].xpose==1 and info[1].xpose==0 and info[2].xpose==2 and
- srcdetail[n*SLAB_MAXDIM ].str==1 and
+ // Optimised for stride 1 and CCTK_COMPLEX
+ copy_data<CCTK_COMPLEX>
+ (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
+ n, varis, nvaris, info[0].xpose, info[1].xpose, info[2].xpose);
+ } else if (srcdetail[n*SLAB_MAXDIM ].str==1 and
srcdetail[n*SLAB_MAXDIM+1].str==1 and
srcdetail[n*SLAB_MAXDIM+2].str==1 and
vartype == CCTK_VARIABLE_INT)
{
- // Optimised version for a special case: transpose x and y
-
- copy_data<CCTK_INT> (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
- n, varis, nvaris, 1, 0, 2);
-
- } else if (srcdetail[n*SLAB_MAXDIM ].str==1 and
- srcdetail[n*SLAB_MAXDIM+1].str==1 and
- srcdetail[n*SLAB_MAXDIM+2].str==1)
- {
- // Optimised version for CCTK_INT and stride 1
-
- copy_data<CCTK_INT> (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
- n, varis, nvaris, info[0].xpose, info[1].xpose, info[2].xpose);
-
+ // Optimised for stride 1 and CCTK_INT
+ copy_data<CCTK_INT>
+ (info, srcdetail, srcoffset, srcelems, srcdata, srcptrs,
+ n, varis, nvaris, info[0].xpose, info[1].xpose, info[2].xpose);
} else {
// Generic, unoptimised version
+ CCTK_TimerStartI (timer_copy_in_general);
int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+info[0].xpose].len;
int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+info[1].xpose].len;
@@ -1443,6 +1688,7 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
}
} // for vari
+ CCTK_TimerStopI (timer_copy_in_general);
}
} // for n
@@ -1470,25 +1716,26 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
&dstdata.front(), &dstcount.front(), &dstoffset.front(), vardatatype,
comm);
} else {
- vector<MPI_Request> requests (2 * size);
+ vector<MPI_Request> requests;
+ requests.reserve (2 * size);
// Start receive
for (int n = 0; n < size; ++n) {
if (n != rank and dstcount[n] > 0) {
+ MPI_Request req;
MPI_Irecv
(&dstdata[vartypesize * dstoffset[n]], dstcount[n], vardatatype,
- n, 0, comm, &requests[n]);
- } else {
- requests[n] = MPI_REQUEST_NULL;
+ n, 0, comm, &req);
+ requests.push_back (req);
}
}
// Start send
for (int n = 0; n < size; ++n) {
if (n != rank and srccount[n] > 0) {
+ MPI_Request req;
MPI_Isend
(&srcdata[vartypesize * srcoffset[n]], srccount[n], vardatatype,
- n, 0, comm, &requests[size + n]);
- } else {
- requests[size + n] = MPI_REQUEST_NULL;
+ n, 0, comm, &req);
+ requests.push_back (req);
}
}
// Self communication
@@ -1500,7 +1747,7 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
dstcount[n] * vartypesize);
}
// Wait
- MPI_Waitall (2 * size, &requests.front(), MPI_STATUSES_IGNORE);
+ MPI_Waitall (requests.size(), &requests.front(), MPI_STATUSES_IGNORE);
}
ifcheck {
@@ -1524,116 +1771,36 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
for (int n = 0; n < size; ++n) {
check (SLAB_MAXDIM == 3);
- if (info[0].flip==0 and info[1].flip==0 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
+ if (dstdetail[n*SLAB_MAXDIM ].str==1 and
dstdetail[n*SLAB_MAXDIM+1].str==1 and
dstdetail[n*SLAB_MAXDIM+2].str==1 and
vartype == CCTK_VARIABLE_REAL)
{
- // Optimised version for a special case: no flipping
-
- copy_data_back<CCTK_REAL> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris);
-
- } else if (info[0].flip==1 and info[1].flip==0 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
- dstdetail[n*SLAB_MAXDIM+1].str==1 and
- dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_REAL)
- {
- // Optimised version for a special case: flip in x direction
-
- copy_data_back<CCTK_REAL> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, true);
-
- } else if (info[0].flip==0 and info[1].flip==1 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
- dstdetail[n*SLAB_MAXDIM+1].str==1 and
- dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_REAL)
- {
- // Optimised version for a special case: flip in y direction
-
- copy_data_back<CCTK_REAL> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, false, true);
-
- } else if (info[0].flip==1 and info[1].flip==1 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
- dstdetail[n*SLAB_MAXDIM+1].str==1 and
- dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_REAL)
- {
- // Optimised version for a special case: flip in x and y direction
-
- copy_data_back<CCTK_REAL> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, true, true);
-
+ // Optimised version for stride 1 and CCTK_REAL
+ copy_data_back<CCTK_REAL>
+ (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
+ n, varis, nvaris, info[0].flip, info[1].flip, info[2].flip);
} else if (dstdetail[n*SLAB_MAXDIM ].str==1 and
dstdetail[n*SLAB_MAXDIM+1].str==1 and
dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_REAL)
- {
- // Optimised version for CCTK_REAL and stride 1
-
- copy_data_back<CCTK_REAL> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, info[0].flip==1, info[1].flip==1, info[2].flip==1);
-
- } else if (info[0].flip==0 and info[1].flip==0 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
- dstdetail[n*SLAB_MAXDIM+1].str==1 and
- dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_INT)
- {
- // Optimised version for a special case: no flipping
-
- copy_data_back<CCTK_INT> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris);
-
- } else if (info[0].flip==1 and info[1].flip==0 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
- dstdetail[n*SLAB_MAXDIM+1].str==1 and
- dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_INT)
- {
- // Optimised version for a special case: flip in x direction
-
- copy_data_back<CCTK_INT> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, true);
-
- } else if (info[0].flip==0 and info[1].flip==1 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
- dstdetail[n*SLAB_MAXDIM+1].str==1 and
- dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_INT)
- {
- // Optimised version for a special case: flip in y direction
-
- copy_data_back<CCTK_INT> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, false, true);
-
- } else if (info[0].flip==1 and info[1].flip==1 and info[2].flip==0 and
- dstdetail[n*SLAB_MAXDIM ].str==1 and
- dstdetail[n*SLAB_MAXDIM+1].str==1 and
- dstdetail[n*SLAB_MAXDIM+2].str==1 and
- vartype == CCTK_VARIABLE_INT)
+ vartype == CCTK_VARIABLE_COMPLEX)
{
- // Optimised version for a special case: flip in x and y direction
-
- copy_data_back<CCTK_INT> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, true, true);
-
+ // Optimised version for stride 1 and CCTK_COMPLEX
+ copy_data_back<CCTK_COMPLEX>
+ (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
+ n, varis, nvaris, info[0].flip, info[1].flip, info[2].flip);
} else if (dstdetail[n*SLAB_MAXDIM ].str==1 and
dstdetail[n*SLAB_MAXDIM+1].str==1 and
dstdetail[n*SLAB_MAXDIM+2].str==1 and
vartype == CCTK_VARIABLE_INT)
{
- // Optimised version for CCTK_INT and stride 1
-
- copy_data_back<CCTK_INT> (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
- n, varis, nvaris, info[0].flip==1, info[1].flip==1, info[2].flip==1);
-
+ // Optimised version for stride 1 and CCTK_INT
+ copy_data_back<CCTK_INT>
+ (info, dstdetail, dstoffset, dstelems, dstdata, dstptrs,
+ n, varis, nvaris, info[0].flip, info[1].flip, info[2].flip);
} else {
// Generic, unoptimised version
+ CCTK_TimerStartI (timer_copy_back_general);
int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len;
int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len;
@@ -1684,6 +1851,7 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
}
} // for vari
+ CCTK_TimerStopI (timer_copy_back_general);
}
@@ -1699,11 +1867,79 @@ Slab_MultiTransfer (cGH const * restrict const cctkGH,
MPI_Barrier (comm);
}
+ CCTK_TimerStopI (timer_apply);
+
+ return 0;
+}
+
+
+
+extern "C"
+int
+Slab_MultiTransfer_Finalize
+(cGH const * restrict const cctkGH,
+ slabsetup * restrict const slabsetup)
+{
+ DECLARE_CCTK_PARAMETERS;
+
+ // Check arguments
+ check (cctkGH);
+ check (slabsetup);
+
+ delete slabsetup;
+
+ return 0;
+}
+
+
+
+// Interface for transferring a variable in one go
+extern "C"
+int
+Slab_MultiTransfer (cGH const * restrict const cctkGH,
+ int const dim,
+ xferinfo const * restrict const xferinfo,
+ int const options,
+ int const nvars,
+ int const * restrict const srctypes,
+ void const * restrict const * restrict const srcptrs,
+ int const * restrict const dsttypes,
+ void * restrict const * restrict const dstptrs)
+{
+ slabsetup * restrict const slabsetup =
+ Slab_MultiTransfer_Init (cctkGH, dim, xferinfo, options);
+ Slab_MultiTransfer_Apply (cctkGH, slabsetup,
+ nvars, srctypes, srcptrs, dsttypes, dstptrs);
+ Slab_MultiTransfer_Finalize (cctkGH, slabsetup);
return 0;
}
+// Old interface for transferring a single variable
+extern "C"
+int
+Slab_Transfer (cGH const * restrict const cctkGH,
+ int const dim,
+ xferinfo const * restrict const xferinfo,
+ int const options,
+ int const srctype,
+ void const * restrict const srcptr,
+ int const dsttype,
+ void * restrict const dstptr)
+{
+ int const nvars = 1;
+ int const srctypes[] = { srctype };
+ void const * restrict const srcptrs[] = { srcptr };
+ int const dsttypes[] = { dsttype };
+ void * restrict const dstptrs[] = { dstptr };
+ return Slab_MultiTransfer (cctkGH, dim, xferinfo, options,
+ nvars, srctypes, srcptrs, dsttypes, dstptrs);
+}
+
+
+
+// Fortran wrapper
extern "C"
void CCTK_FCALL
CCTK_FNAME(Slab_Transfer) (int * restrict const ierr,
@@ -1765,25 +2001,3 @@ CCTK_FNAME(Slab_Transfer) (int * restrict const ierr,
*ierr = Slab_Transfer (*cctkGH, *dim, &xferinfo.front(), *options,
*srctype, srcptr, *dsttype, dstptr);
}
-
-
-
-extern "C"
-int
-Slab_Transfer (cGH const * restrict const cctkGH,
- int const dim,
- xferinfo const * restrict const xferinfo,
- int const options,
- int const srctype,
- void const * restrict const srcptr,
- int const dsttype,
- void * restrict const dstptr)
-{
- int const nvars = 1;
- int const srctypes[] = { srctype };
- void const * restrict const srcptrs[] = { srcptr };
- int const dsttypes[] = { dsttype };
- void * restrict const dstptrs[] = { dstptr };
- return Slab_MultiTransfer (cctkGH, dim, xferinfo, options,
- nvars, srctypes, srcptrs, dsttypes, dstptrs);
-}
diff --git a/src/slab.h b/src/slab.h
index 7c02cce..e8c1855 100644
--- a/src/slab.h
+++ b/src/slab.h
@@ -1,5 +1,3 @@
-/* $Header$ */
-
#ifndef SLAB_H
#define SLAB_H
@@ -100,26 +98,54 @@ void
print_xferinfo (FILE * const out,
struct xferinfo const * CCTK_RESTRICT const xferinfo);
+
+
+struct slabsetup;
+
+struct slabsetup *
+Slab_MultiTransfer_Init
+(cGH const * CCTK_RESTRICT const cctkGH,
+ int const dim,
+ struct xferinfo const * CCTK_RESTRICT const xferinfo,
+ int const options);
+
+int
+Slab_MultiTransfer_Apply
+(cGH const * CCTK_RESTRICT const cctkGH,
+ struct slabsetup const * CCTK_RESTRICT const slabsetup,
+ int const nvars,
+ int const * CCTK_RESTRICT const srctypes,
+ void const * CCTK_RESTRICT const * CCTK_RESTRICT const srcptrs,
+ int const * CCTK_RESTRICT const dsttypes,
+ void * CCTK_RESTRICT const * CCTK_RESTRICT const dstptrs);
+
+int
+Slab_MultiTransfer_Finalize
+(cGH const * CCTK_RESTRICT const cctkGH,
+ struct slabsetup * CCTK_RESTRICT const slabsetup);
+
int
-Slab_Transfer (cGH const * CCTK_RESTRICT const cctkGH,
- int const dim,
- struct xferinfo const * CCTK_RESTRICT const xferinfo,
- int const options,
- int const srctype,
- void const * CCTK_RESTRICT const srcptr,
- int const dsttype,
- void * CCTK_RESTRICT const dstptr);
+Slab_MultiTransfer
+(cGH const * CCTK_RESTRICT const cctkGH,
+ int const dim,
+ struct xferinfo const * CCTK_RESTRICT const xferinfo,
+ int const options,
+ int const nvars,
+ int const * CCTK_RESTRICT const srctypes,
+ void const * CCTK_RESTRICT const * CCTK_RESTRICT const srcptrs,
+ int const * CCTK_RESTRICT const dsttypes,
+ void * CCTK_RESTRICT const * CCTK_RESTRICT const dstptrs);
int
-Slab_MultiTransfer (cGH const * CCTK_RESTRICT const cctkGH,
- int const dim,
- struct xferinfo const * CCTK_RESTRICT const xferinfo,
- int const options,
- int const nvars,
- int const * CCTK_RESTRICT const srctypes,
- void const * CCTK_RESTRICT const * CCTK_RESTRICT const srcptrs,
- int const * CCTK_RESTRICT const dsttypes,
- void * CCTK_RESTRICT const * CCTK_RESTRICT const dstptrs);
+Slab_Transfer
+(cGH const * CCTK_RESTRICT const cctkGH,
+ int const dim,
+ struct xferinfo const * CCTK_RESTRICT const xferinfo,
+ int const options,
+ int const srctype,
+ void const * CCTK_RESTRICT const srcptr,
+ int const dsttype,
+ void * CCTK_RESTRICT const dstptr);
#ifdef __cplusplus
}
diff --git a/src/slab.inc b/src/slab.inc
index 395985a..791cfd8 100644
--- a/src/slab.inc
+++ b/src/slab.inc
@@ -1,5 +1,4 @@
! -*-F90-*-
-! $Header$
interface
subroutine Slab_Transfer (ierr, cctkGH, dim, &