From c09c77a5a8ad5c3bea9ff1b0c8aff9987d1a565b Mon Sep 17 00:00:00 2001 From: schnetter Date: Sun, 25 Oct 2009 23:34:31 +0000 Subject: Update thorn TAT/Slab: - handle different variable types in Slab_MultiTransfer Also: - rewrite in C++ - clean up code git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@61 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8 --- src/slab.cc | 1886 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1886 insertions(+) create mode 100644 src/slab.cc diff --git a/src/slab.cc b/src/slab.cc new file mode 100644 index 0000000..0ac4dc5 --- /dev/null +++ b/src/slab.cc @@ -0,0 +1,1886 @@ +// TODO: +// Provide facilities for dim > 3 +// Set up the slab exchange information in advance +// Slab in several stages +// Test slabbing without MPI +// Allow not setting the ghost zones +// Allow not using / not setting the boundaries +// Allow different numbers of ghost zones at the lower and upper boundary + + + +// Print debug information? +#undef DEBUG + +// Perform expensive self-checks? +#define CHECK + +// Omit all self-checks? (Overrides CHECK) +#undef NDEBUG + +// Byte value for poison checks: use 255 for nan, or e.g. 113 for a +// large value +#define POISON_VALUE 254 + + + +#include +#include +#include +#include +#include + +#include "cctk.h" +#include "cctk_Arguments.h" +#include "cctk_Parameters.h" +#include "cctk_DefineThorn.h" +#include "util_ErrorCodes.h" +#include "util_Table.h" + +#ifdef CCTK_MPI +# include +# define HAVE_MPI 1 +#else +# define HAVE_MPI 0 +#endif + +#include "slab.h" + +#ifdef CCTK_CXX_RESTRICT +# undef restrict +# define restrict CCTK_CXX_RESTRICT +#endif + +using namespace std; + + + +#ifdef DEBUG +# define ifdebug +#else +# define ifdebug while (0) +#endif + +#ifdef CHECK +# define ifcheck +#else +# define ifcheck while (0) +#endif + +#ifdef NDEBUG +# define check(x) ((x) ? 0 : CCTK_WARN (0, "internal error")) +#else +# define check(x) assert (x) +#endif + + + +static int timer_init = -1; +static int timer_copy_in = -1; +static int timer_xfer = -1; +static int timer_copy_back = -1; + +extern "C" +void +Slab_InitTimers (CCTK_ARGUMENTS) +{ + DECLARE_CCTK_ARGUMENTS; + timer_init = CCTK_TimerCreate ("Slab/init"); + timer_copy_in = CCTK_TimerCreate ("Slab/copy in"); + timer_xfer = CCTK_TimerCreate ("Slab/xfer"); + timer_copy_back = CCTK_TimerCreate ("Slab/copy back"); +} + +extern "C" +int +Slab_PrintTimers () +{ + CCTK_TimerPrintDataI (timer_init , -1); + CCTK_TimerPrintDataI (timer_copy_in , -1); + CCTK_TimerPrintDataI (timer_xfer , -1); + CCTK_TimerPrintDataI (timer_copy_back, -1); + return 0; +} + + + +// Find out which driver to use +#ifdef CCTK_MPI +# if defined CARPET_CARPET +# include "Carpet/Carpet/src/carpet_public.h" +# endif +# if defined CACTUSPUGH_PUGH +# include "CactusPUGH/PUGH/src/include/pugh.h" +# endif +#endif + + + +#ifdef CCTK_MPI +// Determine MPI type sizes + +# define CACTUS_MPI_BYTE MPI_CHAR + +# define CACTUS_MPI_INT1 MPI_CHAR + +# if SIZEOF_SHORT_INT == 2 +# define CACTUS_MPI_INT2 MPI_SHORT +# elif SIZEOF_INT == 2 +# define CACTUS_MPI_INT2 MPI_INT +# elif SIZEOF_LONG_INT == 2 +# define CACTUS_MPI_INT2 MPI_LONG +# elif SIZEOF_LONG_LONG == 2 +# define CACTUS_MPI_INT2 MPI_LONG_LONG_INT +# endif + +# if SIZEOF_SHORT_INT == 4 +# define CACTUS_MPI_INT4 MPI_SHORT +# elif SIZEOF_INT == 4 +# define CACTUS_MPI_INT4 MPI_INT +# elif SIZEOF_LONG_INT == 4 +# define CACTUS_MPI_INT4 MPI_LONG +# elif SIZEOF_LONG_LONG == 4 +# define CACTUS_MPI_INT4 MPI_LONG_LONG_INT +# endif + +# if SIZEOF_SHORT_INT == 8 +# define CACTUS_MPI_INT8 MPI_SHORT +# elif SIZEOF_INT == 8 +# define CACTUS_MPI_INT8 MPI_INT +# elif SIZEOF_LONG_INT == 8 +# define CACTUS_MPI_INT8 MPI_LONG +# elif SIZEOF_LONG_LONG == 8 +# define CACTUS_MPI_INT8 MPI_LONG_LONG_INT +# endif + +# if SIZEOF_FLOAT == 4 +# define CACTUS_MPI_REAL4 MPI_FLOAT +# elif SIZEOF_DOUBLE == 4 +# define CACTUS_MPI_REAL4 MPI_DOUBLE +# elif SIZEOF_LONG_DOUBLE == 4 +# define CACTUS_MPI_REAL4 MPI_LONG_DOUBLE +# endif + +# if SIZEOF_FLOAT == 8 +# define CACTUS_MPI_REAL8 MPI_FLOAT +# elif SIZEOF_DOUBLE == 8 +# define CACTUS_MPI_REAL8 MPI_DOUBLE +# elif SIZEOF_LONG_DOUBLE == 8 +# define CACTUS_MPI_REAL8 MPI_LONG_DOUBLE +# endif + +# if SIZEOF_FLOAT == 16 +# define CACTUS_MPI_REAL16 MPI_FLOAT +# elif SIZEOF_DOUBLE == 16 +# define CACTUS_MPI_REAL16 MPI_DOUBLE +# elif SIZEOF_LONG_DOUBLE == 16 +# define CACTUS_MPI_REAL16 MPI_LONG_DOUBLE +# endif + +static MPI_Datatype CACTUS_MPI_COMPLEX8; +static MPI_Datatype CACTUS_MPI_COMPLEX16; +static MPI_Datatype CACTUS_MPI_COMPLEX32; + +#endif + + + +// Replace MPI functions if MPI is disabled +#ifndef CCTK_MPI + +typedef int MPI_Comm; + +typedef enum { + CACTUS_MPI_BYTE = CCTK_VARIABLE_BYTE, + CACTUS_MPI_INT = CCTK_VARIABLE_INT, + CACTUS_MPI_INT1 = CCTK_VARIABLE_INT1, + CACTUS_MPI_INT2 = CCTK_VARIABLE_INT2, + CACTUS_MPI_INT4 = CCTK_VARIABLE_INT4, + CACTUS_MPI_INT8 = CCTK_VARIABLE_INT8, + CACTUS_MPI_REAL = CCTK_VARIABLE_REAL, + CACTUS_MPI_REAL4 = CCTK_VARIABLE_REAL4, + CACTUS_MPI_REAL8 = CCTK_VARIABLE_REAL8, + CACTUS_MPI_REAL16 = CCTK_VARIABLE_REAL16, + CACTUS_MPI_COMPLEX = CCTK_VARIABLE_COMPLEX, + CACTUS_MPI_COMPLEX8 = CCTK_VARIABLE_COMPLEX8, + CACTUS_MPI_COMPLEX16 = CCTK_VARIABLE_COMPLEX16, + CACTUS_MPI_COMPLEX32 = CCTK_VARIABLE_COMPLEX32 +} MPI_Datatype; + +static MPI_Datatype MPI_INT; + +typedef enum { MPI_MIN, MPI_MAX } MPI_Op; + +static int +MPI_Barrier (MPI_Comm comm) +{ + return 0; +} + +static int +MPI_Comm_size (MPI_Comm comm, int * size) +{ + *size = 1; + return 0; +} + +static int +MPI_Comm_rank (MPI_Comm comm, int * rank) +{ + *rank = 0; + return 0; +} + +static int +MPI_Allgather (void * sendbuf, int sendcnt, int sendtype, + void * recvbuf, int recvcnt, int recvtype, + MPI_Comm comm) +{ + int recvsize; + assert (sendbuf); + assert (recvbuf); + assert (sendcnt == recvcnt); + assert (recvcnt >= 0); + assert (sendtype == recvtype); + recvsize = CCTK_VarTypeSize (recvtype); + assert (recvsize > 0); + memcpy (recvbuf, sendbuf, recvcnt * recvsize); + return 0; +} + +static int +MPI_Alltoall (void * sendbuf, int sendcnt, int sendtype, + void * recvbuf, int recvcnt, int recvtype, + MPI_Comm comm) +{ + int recvsize; + assert (sendbuf); + assert (recvbuf); + assert (sendcnt == recvcnt); + assert (recvcnt >= 0); + assert (sendtype == recvtype); + recvsize = CCTK_VarTypeSize (recvtype); + assert (recvsize > 0); + memcpy (recvbuf, sendbuf, recvcnt * recvsize); + return 0; +} + +static int +MPI_Alltoallv (void * sendbuf, int * sendcnt, int * sendoff, int sendtype, + void * recvbuf, int * recvcnt, int * recvoff, int recvtype, + MPI_Comm comm) +{ + int recvsize; + assert (sendbuf); + assert (recvbuf); + assert (sendcnt); + assert (recvcnt); + assert (*sendcnt == *recvcnt); + assert (*recvcnt >= 0); + assert (sendoff); + assert (recvoff); + assert (*sendoff == 0); + assert (*recvoff == 0); + assert (sendtype == recvtype); + recvsize = CCTK_VarTypeSize (recvtype); + assert (recvsize > 0); + memcpy (recvbuf, sendbuf, *recvcnt * recvsize); + return 0; +} + +static int +MPI_Allreduce (void * sendbuf, void * recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +{ + int recvsize; + assert (sendbuf); + assert (recvbuf); + assert (count >= 0); + recvsize = CCTK_VarTypeSize (datatype); + assert (recvsize > 0); + memcpy (recvbuf, sendbuf, count * recvsize); + return 0; +} + +typedef int MPI_Request; // dummy +typedef int MPI_Status; // dummy +#define MPI_REQUEST_NULL 0 // dummy +#define MPI_STATUSES_IGNORE 0 // dummy + +static int +MPI_Irecv (void *buf, int count, MPI_Datatype datatype, + int source, int tag, MPI_Comm comm, MPI_Request *request) +{ + abort(); +} + +static int +MPI_Isend (void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *request) +{ + abort(); +} + +static int +MPI_Waitall (int count, MPI_Request *array_of_requests, + MPI_Status *array_of_statuses) +{ + abort(); +} + +#endif + + + +// Get the MPI COMM_WOLRD communicator from the driver +static MPI_Comm +get_mpi_comm (cGH const * restrict const cctkGH) +{ +#ifdef CCTK_MPI + if (CCTK_IsFunctionAliased ("GetMPICommWorld")) { + return * (MPI_Comm const *) GetMPICommWorld (cctkGH); + } +# if defined CACTUSPUGH_PUGH + { + static int PUGH_active = -1; + if (PUGH_active == -1) PUGH_active = CCTK_IsThornActive ("PUGH"); + assert (PUGH_active >= 0); + if (PUGH_active) return PUGH_pGH(cctkGH)->PUGH_COMM_WORLD; + } +# endif + return MPI_COMM_WORLD; +#else + return 0; +#endif +} + + + +// Initialise the MPI datatypes for complex variables +extern "C" +int +Slab_InitMPIDatatypes () +{ +#ifdef CCTK_MPI +# ifdef HAVE_CCTK_REAL4 + MPI_Type_contiguous (2, CACTUS_MPI_REAL4, &CACTUS_MPI_COMPLEX8); + MPI_Type_commit (&CACTUS_MPI_COMPLEX8); +# endif +# ifdef HAVE_CCTK_REAL8 + MPI_Type_contiguous (2, CACTUS_MPI_REAL8, &CACTUS_MPI_COMPLEX16); + MPI_Type_commit (&CACTUS_MPI_COMPLEX16); +# endif +# ifdef HAVE_CCTK_REAL16 + MPI_Type_contiguous (2, CACTUS_MPI_REAL16, &CACTUS_MPI_COMPLEX32); + MPI_Type_commit (&CACTUS_MPI_COMPLEX32); +# endif +#endif + +#ifndef CCTK_MPI + switch (sizeof(int)) { +#ifdef HAVE_CCTK_INT1 + case sizeof(CCTK_INT1): MPI_INT = CCTK_VARIABLE_INT1; break; +#endif +#ifdef HAVE_CCTK_INT2 + case sizeof(CCTK_INT2): MPI_INT = CCTK_VARIABLE_INT2; break; +#endif +#ifdef HAVE_CCTK_INT4 + case sizeof(CCTK_INT4): MPI_INT = CCTK_VARIABLE_INT4; break; +#endif +#ifdef HAVE_CCTK_INT8 + case sizeof(CCTK_INT8): MPI_INT = CCTK_VARIABLE_INT8; break; +#endif + default: assert(0); + } +#endif + + return 0; +} + + + +// Normalise a Cactus datatype +static int +normal_type (int cactustype) +{ + switch (cactustype) { + case CCTK_VARIABLE_INT: +#ifdef CCTK_INTEGER_PRECISION_1 + return CCTK_VARIABLE_INT1; +#endif +#ifdef CCTK_INTEGER_PRECISION_2 + return CCTK_VARIABLE_INT2; +#endif +#ifdef CCTK_INTEGER_PRECISION_4 + return CCTK_VARIABLE_INT4; +#endif +#ifdef CCTK_INTEGER_PRECISION_8 + return CCTK_VARIABLE_INT8; +#endif + assert (0); + case CCTK_VARIABLE_REAL: +#ifdef CCTK_REAL_PRECISION_4 + return CCTK_VARIABLE_REAL4; +#endif +#ifdef CCTK_REAL_PRECISION_8 + return CCTK_VARIABLE_REAL8; +#endif +#ifdef CCTK_REAL_PRECISION_16 + return CCTK_VARIABLE_REAL16; +#endif + assert (0); + case CCTK_VARIABLE_COMPLEX: +#ifdef CCTK_REAL_PRECISION_4 + return CCTK_VARIABLE_COMPLEX8; +#endif +#ifdef CCTK_REAL_PRECISION_8 + return CCTK_VARIABLE_COMPLEX16; +#endif +#ifdef CCTK_REAL_PRECISION_16 + return CCTK_VARIABLE_COMPLEX32; +#endif + assert (0); + } + return cactustype; +} + + + +// Find the MPI datatype corresponding to a Cactus datatype +static MPI_Datatype +mpi_type (int const cactustype) +{ + int const normaltype = normal_type (cactustype); + switch (normaltype) { + case CCTK_VARIABLE_BYTE: return CACTUS_MPI_BYTE; +#ifdef HAVE_CCTK_INT1 + case CCTK_VARIABLE_INT1: return CACTUS_MPI_INT1; +#endif +#ifdef HAVE_CCTK_INT2 + case CCTK_VARIABLE_INT2: return CACTUS_MPI_INT2; +#endif +#ifdef HAVE_CCTK_INT4 + case CCTK_VARIABLE_INT4: return CACTUS_MPI_INT4; +#endif +#ifdef HAVE_CCTK_INT8 + case CCTK_VARIABLE_INT8: return CACTUS_MPI_INT8; +#endif +#ifdef HAVE_CCTK_REAL4 + case CCTK_VARIABLE_REAL4: return CACTUS_MPI_REAL4; + case CCTK_VARIABLE_COMPLEX8: return CACTUS_MPI_COMPLEX8; +#endif +#ifdef HAVE_CCTK_REAL8 + case CCTK_VARIABLE_REAL8: return CACTUS_MPI_REAL8; + case CCTK_VARIABLE_COMPLEX16: return CACTUS_MPI_COMPLEX16; +#endif +#ifdef HAVE_CCTK_REAL16 + case CCTK_VARIABLE_REAL16: return CACTUS_MPI_REAL16; + case CCTK_VARIABLE_COMPLEX32: return CACTUS_MPI_COMPLEX32; +#endif + } + assert (0); + CCTK_WARN (0, "internal error"); + return CACTUS_MPI_BYTE; // not reached +} + + + +//////////////////////////////////////////////////////////////////////////////// + + + +struct bbox { + int off; // offset + int len; // length + int str; // stride +}; + +struct arrays { + bbox global; // global region (all processors) + bbox local; // this processor's region + bbox active; // ??? + bbox slab; // the slab that should be transferred +}; + +struct xfer { + arrays src; // source region + arrays dst; // destination region + int xpose; // exchange axes + int flip; // change directions of axes +}; + + + +static inline int +roundup (int const x, int const y) +{ + assert (x >= 0); + assert (y > 0); + return (x + y - 1) / y * y; +} + + + +static void +bbox_print (bbox const * restrict const bbox) +{ + assert (bbox); + printf + ("[%d:%d:%d]", + bbox->off, bbox->off + (bbox->len - 1) * bbox->str, bbox->str); +} + +static void +bbox_check (bbox const * restrict const bbox) +{ + assert (bbox); + assert (bbox->len >= 0); + assert (bbox->str > 0); +} + +static void +global2bbox (slabinfo const * restrict const slab, + bbox * restrict const bbox) +{ + assert (slab); + assert (bbox); + assert (slab->gsh >= 0); + bbox->off = 0; + bbox->len = slab->gsh; + bbox->str = 1; + bbox_check (bbox); +} + +static void +local2bbox (slabinfo const * restrict const slab, + bbox * restrict const bbox) +{ + assert (slab); + assert (bbox); + assert (slab->lbnd >= 0); + assert (slab->lsh >= 0); + assert (slab->lbnd + slab->lsh <= slab->gsh); + bbox->off = slab->lbnd; + bbox->len = slab->lsh; + bbox->str = 1; + bbox_check (bbox); +} + +static void +active2bbox (slabinfo const * restrict const slab, + bbox * restrict const bbox, + int const useghosts) +{ + assert (slab); + assert (bbox); + assert (useghosts == 0 or useghosts == 1); + assert (slab->lbnd >= 0); + assert (slab->lsh >= 0); + assert (slab->lbnd + slab->lsh <= slab->gsh); + assert (slab->lbbox == 0 or slab->lbbox == 1); + assert (slab->ubbox == 0 or slab->ubbox == 1); + assert (slab->nghostzones >= 0); + int const nlghostzones = slab->lbbox or useghosts ? 0 : slab->nghostzones; + int const nughostzones = slab->ubbox or useghosts ? 0 : slab->nghostzones; + bbox->off = slab->lbnd + nlghostzones; + bbox->len = slab->lsh - nlghostzones - nughostzones; + bbox->str = 1; + bbox_check (bbox); +} + +static void +slab2bbox (slabinfo const * restrict const slab, + bbox * restrict const bbox) +{ + assert (slab); + assert (bbox); + bbox->off = slab->off; + bbox->len = slab->len; + bbox->str = slab->str; + bbox_check (bbox); +} + +static int +bbox_iscontained (bbox const * restrict const inner, + bbox const * restrict const outer) +{ + bbox_check (inner); + bbox_check (outer); + int const inner_last = inner->off + (inner->len - 1) * inner->str; + int const outer_last = outer->off + (outer->len - 1) * outer->str; + return inner->off >= outer->off and inner_last <= outer_last; +} + +static void +bbox_clip (bbox * restrict const inner, + bbox const * restrict const outer) +{ + bbox_check (inner); + bbox_check (outer); + int inner_last = inner->off + (inner->len - 1) * inner->str; + int const outer_last = outer->off + (outer->len - 1) * outer->str; + if (inner->off < outer->off) { + inner->off += roundup (outer->off - inner->off, inner->str); + } + if (inner_last > outer_last) { + inner_last -= roundup (inner_last - outer_last, inner->str); + } + assert ((inner_last - inner->off) % inner->str == 0); + if (inner_last >= inner->off) { + inner->len = (inner_last - inner->off + inner->str) / inner->str; + } else { + inner->len = 0; + } + bbox_check (inner); +} + +// ydst = xdst + Flip (ysrc - xsrc)] +// This function is its own inverse. +static void +bbox_xform (bbox * restrict const ydst, + bbox const * restrict const ysrc, + bbox const * restrict const xdst, + bbox const * restrict const xsrc, + int const flip) +{ + assert (ydst); + bbox_check (ysrc); + bbox_check (xdst); + bbox_check (xsrc); + assert (ysrc->str == xsrc->str); + /* int const xsrc_last = xsrc->off + (xsrc->len - 1) * xsrc->str; */ + int const xdst_last = xdst->off + (xdst->len - 1) * xdst->str; + int const ysrc_last = ysrc->off + (ysrc->len - 1) * ysrc->str; + ydst->str = xdst->str; + assert ((ysrc->off - xsrc->off) % ysrc->str == 0); + ydst->off = xdst->off + (ysrc->off - xsrc->off) / ysrc->str * ydst->str; + int ydst_last = + xdst->off + (ysrc_last - xsrc->off) / ysrc->str * ydst->str; + if (flip) { + int const off = ydst->off; + int const last = ydst_last; + ydst->off = xdst->off + xdst_last - last; + ydst_last = xdst_last - (off - xdst->off); + } + assert ((ysrc_last - xsrc->off) % ysrc->str == 0); + assert (ydst_last - ydst->off + ydst->str >= 0); + ydst->len = (ydst_last - ydst->off + ydst->str) / ydst->str; + bbox_check (ydst); +} + + + +extern "C" +void +print_slabinfo (FILE * const out, + slabinfo const * restrict const slabinfo) +{ + fprintf (out, " gsh: %d\n", slabinfo->gsh); + fprintf (out, " lbnd: %d, lsh: %d\n", slabinfo->lbnd, slabinfo->lsh); + fprintf (out, " lbbox: %d, ubbox: %d, nghostzones: %d\n", + slabinfo->lbbox, slabinfo->ubbox, slabinfo->nghostzones); + fprintf (out, " off: %d, str: %d, len: %d\n", + slabinfo->off, slabinfo->str, slabinfo->len); +} + +extern "C" +void +print_xferinfo (FILE * const out, + xferinfo const * restrict const xferinfo) +{ + fprintf (out, " src:\n"); + print_slabinfo (out, & xferinfo->src); + fprintf (out, " dst:\n"); + print_slabinfo (out, & xferinfo->dst); + fprintf (out, " xpose: %d\n", xferinfo->xpose); + fprintf (out, " flip: %d\n", xferinfo->flip); +} + + + +extern "C" +int +Slab_MultiTransfer (cGH const * restrict const cctkGH, + int const dim, + xferinfo const * restrict const xferinfo, + int const options, + int const nvars, + int const * restrict const srctypes, + void const * restrict const * restrict const srcptrs, + int const * restrict const dsttypes, + void * restrict const * restrict const dstptrs) +{ + DECLARE_CCTK_PARAMETERS; + + // Check arguments + check (cctkGH); + check (dim >= 0); + check (xferinfo); + check (nvars >= 0); + check (nvars==0 or srctypes); + for (int var=0; var= 0); + check (nvars==0 or srcptrs); + // for (int var=0; var= 0); + check (nvars==0 or dstptrs); + // for (int var=0; var info (SLAB_MAXDIM); + for (int d=0; d= 0 and info[d].xpose < dim); + info[d].flip = xferinfo[d].flip; + check (info[d].flip == 0 or info[d].flip == 1); + } + for (int d=dim; d= 0 and info[d].xpose < SLAB_MAXDIM); + info[d].flip = 0; + check (info[d].flip == 0 or info[d].flip == 1); + } + + ifcheck { + ifdebug printf ("srcinfo:\n"); + for (int d=0; d 0) { + assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len); + } + } + } + + size_t srclentot = 1; + size_t dstlentot = 1; + for (int d=0; d 0) assert (srcptrs[var]); + for (int var=0; var 0) assert (dstptrs[var]); + + + + MPI_Comm comm; + { + CCTK_POINTER_TO_CONST tmp1; + int const iret1 = Util_TableGetPointerToConst (options, &tmp1, "comm"); + if (iret1 == 1) { + // There was an entry, use it + comm = * (MPI_Comm const *) tmp1; + } else if (iret1 == UTIL_ERROR_TABLE_WRONG_DATA_TYPE) { + // Entry has wront type, fall back + CCTK_POINTER tmp2; + int const iret2 = Util_TableGetPointer (options, &tmp2, "comm"); + if (iret2 == 1) { + // There was an entry, use it + comm = * (MPI_Comm const *) tmp2; + } else { + // Something went wrong, abort + check (0); + } + } else if (iret1 == UTIL_ERROR_BAD_HANDLE or + iret1 == UTIL_ERROR_TABLE_NO_SUCH_KEY) + { + // There was no entry, use a default + comm = get_mpi_comm (cctkGH); + } else { + // Something went wrong, abort + check (0); + } + } + + ifcheck { + ifdebug fflush (stdout); + MPI_Barrier (comm); + } + + int size, rank; + MPI_Comm_size (comm, &size); + MPI_Comm_rank (comm, &rank); + + ifcheck { + static int count = 424242; + int mycount = count; + ifdebug fflush (stdout); + MPI_Bcast (&mycount, 1, MPI_INT, 0, comm); + assert (mycount == count); + ++ count; + } + + + + vector allinfo (size * SLAB_MAXDIM); + { + int const info_nints = sizeof(xfer) / sizeof(int); + ifdebug fflush (stdout); + MPI_Allgather + (&info.front(), SLAB_MAXDIM * info_nints, MPI_INT, + &allinfo.front(), SLAB_MAXDIM * info_nints, MPI_INT, comm); + } + + for (int n = 0; n < size; ++n) { + for (int d=0; d 0 and + info[d].src.slab.len > 0) + { + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off); + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len); + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str); + assert + (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str); + assert + (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str); + // 2003-03-01 eschnett: I don't know why the following should + // be necessary + assert + (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str); + } + if (allinfo[n*SLAB_MAXDIM+d].dst.slab.len > 0 and + info[d].dst.slab.len > 0) + { + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str); + } + assert (allinfo[n*SLAB_MAXDIM+d].xpose == info[d].xpose); + assert (allinfo[n*SLAB_MAXDIM+d].flip == info[d].flip); + } + } + + + + vector srcdetail (size * SLAB_MAXDIM); + for (int n = 0; n < size; ++n) { + ifdebug printf ("srcdetail n=%d:\n", n); + for (int d=0; d srcelems (size); + vector srccount (size); + vector srcoffset (size + 1); + + + + vector dstdetail (size * SLAB_MAXDIM); + for (int n = 0; n < size; ++n) { + ifdebug printf ("dstdetail n=%d:\n", n); + for (int d=0; d dstelems (size); + vector dstcount (size); + vector dstoffset (size + 1); + + CCTK_TimerStopI (timer_init); + + + + int nvartypes = 0; + vector vartypes (nvars); + vector vartypecount (nvars); + for (int var=0; var=nvartypes) { + vartypes[nvartypes] = srctype; + vartypecount[nvartypes] = 0; + ++nvartypes; + } + assert (vartypei varis (nvars); + for (int var=0; var 0); + + int const vartypesize = CCTK_VarTypeSize (vartype); + check (vartypesize > 0); + MPI_Datatype const vardatatype = mpi_type (vartype); + + + + srcoffset[0] = 0; + for (int n = 0; n < size; ++n) { + srcelems[n] = 1; + for (int d=0; d srcdata (srcoffset[size] * vartypesize); + ifcheck { + if (vartype == CCTK_VARIABLE_REAL) { + CCTK_REAL * restrict const srcdataptr = (CCTK_REAL *)&srcdata.front(); + CCTK_REAL marker; + memset (&marker, POISON_VALUE, sizeof marker); + for (int i = 0; i < srcoffset[size]; ++i) { + memcpy (&srcdataptr[i], &marker, sizeof marker); + } + } + } + + + + dstoffset[0] = 0; + for (int n = 0; n < size; ++n) { + dstelems[n] = 1; + for (int d=0; d dstdata (dstoffset[size] * vartypesize); + ifcheck { + if (vartype == CCTK_VARIABLE_REAL) { + CCTK_REAL * restrict const dstdataptr = (CCTK_REAL *)&dstdata.front(); + CCTK_REAL marker; + memset (&marker, POISON_VALUE, sizeof marker); + for (int i = 0; i < dstoffset[size]; ++i) { + memcpy (&dstdataptr[i], &marker, sizeof marker); + } + } + } + + check (srccount[rank] == dstcount[rank]); + + + + ifcheck { + vector src2count (size); + vector dst2count (size); + ifdebug fflush (stdout); + MPI_Alltoall + (&srccount.front(), 1, MPI_INT, &src2count.front(), 1, MPI_INT, comm); + MPI_Alltoall + (&dstcount.front(), 1, MPI_INT, &dst2count.front(), 1, MPI_INT, comm); + for (int n = 0; n < size; ++n) { + check (src2count[n] == dstcount[n]); + check (dst2count[n] == srccount[n]); + } + } + + + + CCTK_TimerStartI (timer_copy_in); + + for (int n = 0; n < size; ++n) { + check (SLAB_MAXDIM == 3); + + if (info[0].xpose==0 and info[1].xpose==1 and info[2].xpose==2 and + srcdetail[n*SLAB_MAXDIM ].str==1 and + srcdetail[n*SLAB_MAXDIM+1].str==1 and + srcdetail[n*SLAB_MAXDIM+2].str==1 and + vartype == CCTK_VARIABLE_REAL) + { + // Optimised version for a special case: no transposing + + int const srcoffi = info[0].src.local.off; + int const srcoffj = info[1].src.local.off; + int const srcoffk = info[2].src.local.off; + + int const srcleni = info[0].src.local.len; + int const srclenj = info[1].src.local.len; + int const srclenk = info[2].src.local.len; + + int const srcdetailoffi = srcdetail[n*SLAB_MAXDIM+0].off; + int const srcdetailoffj = srcdetail[n*SLAB_MAXDIM+1].off; + int const srcdetailoffk = srcdetail[n*SLAB_MAXDIM+2].off; + + int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+0].len; + int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len; + int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len; + + if (n==0) assert (srcoffset[n]==0); + // TODO: This does not take nvaris into account + // if (n=0 and srcindi=0 and srcindj=0 and srcindk=0 and srcindi=0 and srcindj=0 and srcindk= info[c].src.local.off and + srcipos[c] < info[c].src.local.off + info[c].src.local.len); + assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off and + srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1)); + bufipos[d] = ipos[d]; + assert (bufipos[d] >= 0 and + bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); + } + size_t srcind = 0; + size_t bufind = 0; + for (int d=SLAB_MAXDIM-1; d>=0; --d) { + int const c = info[d].xpose; + srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; + bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; + } + assert (srcind < srclentot); + assert (bufind < (size_t)srccount[n]); + srcdataptr[bufind] = srcptr[srcind]; + } + } + } + + } // for vari + + } else { + // Generic, unoptimised version + + int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+info[0].xpose].len; + int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+info[1].xpose].len; + int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+info[2].xpose].len; + + for (int vari=0; vari= info[c].src.local.off and + srcipos[c] < info[c].src.local.off + info[c].src.local.len); + assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off and + srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str); + assert ((srcipos[c] - allinfo[n*SLAB_MAXDIM+c].src.slab.off) % allinfo[n*SLAB_MAXDIM+c].src.slab.str == 0); + bufipos[d] = ipos[d]; + assert (bufipos[d] >= 0 and bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); + } + size_t srcind = 0; + size_t bufind = 0; + for (int d=SLAB_MAXDIM-1; d>=0; --d) { + int const c = info[d].xpose; + srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; + bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; + } + assert (srcind < srclentot); + assert (bufind < (size_t)srccount[n]); + memcpy (srcdataptr + vartypesize * bufind, + srcptr + vartypesize * srcind, + vartypesize); + } + } + } + + } // for vari + + } + } // for n + + ifcheck { + if (vartype == CCTK_VARIABLE_REAL) { + CCTK_REAL const * restrict const srcdataptr = + (CCTK_REAL const *)&srcdata.front(); + CCTK_REAL marker; + memset (&marker, POISON_VALUE, sizeof marker); + for (int i = 0; i < srcoffset[size]; ++i) { + assert (memcmp(&srcdataptr[i], &marker, sizeof marker) != 0); + } + } + } + CCTK_TimerStopI (timer_copy_in); + + + + CCTK_TimerStartI (timer_xfer); + ifdebug fflush (stdout); + if (not HAVE_MPI or use_alltoallv) { + MPI_Alltoallv + (&srcdata.front(), &srccount.front(), &srcoffset.front(), vardatatype, + &dstdata.front(), &dstcount.front(), &dstoffset.front(), vardatatype, + comm); + } else { + vector requests (2 * size); + // Start receive + for (int n = 0; n < size; ++n) { + if (n != rank and dstcount[n] > 0) { + MPI_Irecv + (&dstdata[vartypesize * dstoffset[n]], dstcount[n], vardatatype, + n, 0, comm, &requests[n]); + } else { + requests[n] = MPI_REQUEST_NULL; + } + } + // Start send + for (int n = 0; n < size; ++n) { + if (n != rank and srccount[n] > 0) { + MPI_Isend + (&srcdata[vartypesize * srcoffset[n]], srccount[n], vardatatype, + n, 0, comm, &requests[size + n]); + } else { + requests[size + n] = MPI_REQUEST_NULL; + } + } + // Self communication + { + int const n = rank; + assert (dstcount[n] == srccount[n]); + memcpy (&dstdata[vartypesize * dstoffset[n]], + &srcdata[vartypesize * srcoffset[n]], + dstcount[n] * vartypesize); + } + // Wait + MPI_Waitall (2 * size, &requests.front(), MPI_STATUSES_IGNORE); + } + + ifcheck { + if (vartype == CCTK_VARIABLE_REAL) { + for (int vari=0; vari=0 and dstindi=0 and dstindj=0 and dstindk=0 and dstindi=0 and dstindj=0 and dstindk=0 and dstindi=0 and dstindj=0 and dstindk=0 and dstindi=0 and dstindj=0 and dstindk= 0 and bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); + dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d]; + ifcheck assert (dstipos[d] >= info[d].dst.local.off and + dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); + ifcheck assert (dstipos[d] >= info[d].dst.slab.off and + dstipos[d] <= info[d].dst.slab.off + info[d].dst.slab.len - 1); + } + size_t bufind = 0; + size_t dstind = 0; + for (int d=SLAB_MAXDIM-1; d>=0; --d) { + bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; + dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; + } + ifcheck assert (bufind < (size_t)dstcount[n]); + ifcheck assert (dstind < dstlentot); + dstptr[dstind] = dstdataptr[bufind]; + } + } + } + + } // for vari + + } else { + // Generic, unoptimised version + + int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; + int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; + int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; + + for (int vari=0; vari= 0 and bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); + dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d] * info[d].dst.slab.str; + ifcheck assert (dstipos[d] >= info[d].dst.local.off and + dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); + ifcheck assert (dstipos[d] >= info[d].dst.slab.off and + dstipos[d] <= info[d].dst.slab.off + (info[d].dst.slab.len - 1) * info[d].dst.slab.str); + ifcheck assert ((dstipos[d] - info[d].dst.slab.off) % info[d].dst.slab.str == 0); + } + size_t bufind = 0; + size_t dstind = 0; + for (int d=SLAB_MAXDIM-1; d>=0; --d) { + bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; + dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; + } + ifcheck assert (bufind < (size_t)dstcount[n]); + ifcheck assert (dstind < dstlentot); + memcpy (dstptr + vartypesize * dstind, + dstdataptr + vartypesize * bufind, + vartypesize); + } + } + } + + } // for vari + + } + + } // for n + CCTK_TimerStopI (timer_copy_back); + + } // for vartypei + + + + ifcheck { + ifdebug fflush (stdout); + MPI_Barrier (comm); + } + + return 0; +} + + + +extern "C" +void CCTK_FCALL +CCTK_FNAME(Slab_Transfer) (int * restrict const ierr, + cGH const * const * restrict const cctkGH, + int const * restrict const dim, + int const * restrict const src_gsh, + int const * restrict const src_lbnd, + int const * restrict const src_lsh, + int const * restrict const src_lbbox, + int const * restrict const src_ubbox, + int const * restrict const src_nghostzones, + int const * restrict const src_off, + int const * restrict const src_str, + int const * restrict const src_len, + int const * restrict const dst_gsh, + int const * restrict const dst_lbnd, + int const * restrict const dst_lsh, + int const * restrict const dst_lbbox, + int const * restrict const dst_ubbox, + int const * restrict const dst_nghostzones, + int const * restrict const dst_off, + int const * restrict const dst_str, + int const * restrict const dst_len, + int const * restrict const xpose, + int const * restrict const flip, + int const * restrict const options, + int const * restrict const srctype, + void const * restrict const srcptr, + int const * restrict const dsttype, + void * restrict const dstptr) +{ + vector xferinfo (*dim); + + for (int d=0; d<*dim; ++d) { + xferinfo[d].src.gsh = src_gsh[d]; + xferinfo[d].src.lbnd = src_lbnd[d]; + xferinfo[d].src.lsh = src_lsh[d]; + xferinfo[d].src.lbbox = src_lbbox[d]; + xferinfo[d].src.ubbox = src_ubbox[d]; + xferinfo[d].src.nghostzones = src_nghostzones[d]; + xferinfo[d].src.off = src_off[d]; + xferinfo[d].src.str = src_str[d]; + xferinfo[d].src.len = src_len[d]; + + xferinfo[d].dst.gsh = dst_gsh[d]; + xferinfo[d].dst.lbnd = dst_lbnd[d]; + xferinfo[d].dst.lsh = dst_lsh[d]; + xferinfo[d].dst.lbbox = dst_lbbox[d]; + xferinfo[d].dst.ubbox = dst_ubbox[d]; + xferinfo[d].dst.nghostzones = dst_nghostzones[d]; + xferinfo[d].dst.off = dst_off[d]; + xferinfo[d].dst.str = dst_str[d]; + xferinfo[d].dst.len = dst_len[d]; + + xferinfo[d].xpose = xpose[d]; + xferinfo[d].flip = flip[d]; + } + + *ierr = Slab_Transfer (*cctkGH, *dim, &xferinfo.front(), *options, + *srctype, srcptr, *dsttype, dstptr); +} + + + +extern "C" +int +Slab_Transfer (cGH const * restrict const cctkGH, + int const dim, + xferinfo const * restrict const xferinfo, + int const options, + int const srctype, + void const * restrict const srcptr, + int const dsttype, + void * restrict const dstptr) +{ + int const nvars = 1; + int const srctypes[] = { srctype }; + void const * restrict const srcptrs[] = { srcptr }; + int const dsttypes[] = { dsttype }; + void * restrict const dstptrs[] = { dstptr }; + return Slab_MultiTransfer (cctkGH, dim, xferinfo, options, + nvars, srctypes, srcptrs, dsttypes, dstptrs); +} -- cgit v1.2.3