/* $Header$ */ /* TODO: Provide facilities for dim > 3 Set up the slab exchange information in advance Slab in several stages Test slabbing without MPI Allow using / not setting the ghost zones Allow not using / not setting the boundaries Allow different numbers of ghost zones at the lower and upper boundary */ /* Print debug information? */ #undef DEBUG /* Perform expensive self-checks? */ #undef CHECK /* Omit all self-checks? (Overrides CHECK) */ #define NDEBUG /* Byte value for poison checks: use 255 for nan, or e.g. 113 for a large value */ #define POISON_VALUE 254 #include #include #include #include #include "cctk.h" #include "cctk_Parameters.h" #include "cctk_DefineThorn.h" #include "util_ErrorCodes.h" #include "util_Table.h" #ifdef CCTK_MPI # include #endif #include "slab.h" static const char * rcsid = "$Header$"; CCTK_FILEVERSION(TAT_Slab_slab_c); #ifdef DEBUG # define ifdebug #else # define ifdebug while (0) #endif #ifdef CHECK # define ifcheck #else # define ifcheck while (0) #endif #ifdef NDEBUG # define check(x) ((x) ? 0 : CCTK_WARN (0, "internal error")) #else # define check(x) assert (x) #endif static int timer_init; static int timer_copy_in; static int timer_xfer; static int timer_copy_back; int Slab_InitTimers (void) { timer_init = CCTK_TimerCreate ("Slab/init"); timer_copy_in = CCTK_TimerCreate ("Slab/copy in"); timer_xfer = CCTK_TimerCreate ("Slab/xfer"); timer_copy_back = CCTK_TimerCreate ("Slab/copy back"); return 0; } int Slab_PrintTimers (void) { CCTK_TimerPrintDataI (timer_init , -1); CCTK_TimerPrintDataI (timer_copy_in , -1); CCTK_TimerPrintDataI (timer_xfer , -1); CCTK_TimerPrintDataI (timer_copy_back, -1); return 0; } /* Find out which driver to use */ #ifdef CCTK_MPI # if defined CARPET_CARPET # include "Carpet/Carpet/src/carpet_public.h" # endif # if defined CACTUSPUGH_PUGH # include "CactusPUGH/PUGH/src/include/pugh.h" # endif #endif #ifdef CCTK_MPI /* Determine MPI type sizes */ # define CACTUS_MPI_BYTE MPI_CHAR # define CACTUS_MPI_INT1 MPI_CHAR # if SIZEOF_SHORT_INT == 2 # define CACTUS_MPI_INT2 MPI_SHORT # elif SIZEOF_INT == 2 # define CACTUS_MPI_INT2 MPI_INT # elif SIZEOF_LONG_INT == 2 # define CACTUS_MPI_INT2 MPI_LONG # elif SIZEOF_LONG_LONG == 2 # define CACTUS_MPI_INT2 MPI_LONG_LONG_INT # endif # if SIZEOF_SHORT_INT == 4 # define CACTUS_MPI_INT4 MPI_SHORT # elif SIZEOF_INT == 4 # define CACTUS_MPI_INT4 MPI_INT # elif SIZEOF_LONG_INT == 4 # define CACTUS_MPI_INT4 MPI_LONG # elif SIZEOF_LONG_LONG == 4 # define CACTUS_MPI_INT4 MPI_LONG_LONG_INT # endif # if SIZEOF_SHORT_INT == 8 # define CACTUS_MPI_INT8 MPI_SHORT # elif SIZEOF_INT == 8 # define CACTUS_MPI_INT8 MPI_INT # elif SIZEOF_LONG_INT == 8 # define CACTUS_MPI_INT8 MPI_LONG # elif SIZEOF_LONG_LONG == 8 # define CACTUS_MPI_INT8 MPI_LONG_LONG_INT # endif # if SIZEOF_FLOAT == 4 # define CACTUS_MPI_REAL4 MPI_FLOAT # elif SIZEOF_DOUBLE == 4 # define CACTUS_MPI_REAL4 MPI_DOUBLE # elif SIZEOF_LONG_DOUBLE == 4 # define CACTUS_MPI_REAL4 MPI_LONG_DOUBLE # endif # if SIZEOF_FLOAT == 8 # define CACTUS_MPI_REAL8 MPI_FLOAT # elif SIZEOF_DOUBLE == 8 # define CACTUS_MPI_REAL8 MPI_DOUBLE # elif SIZEOF_LONG_DOUBLE == 8 # define CACTUS_MPI_REAL8 MPI_LONG_DOUBLE # endif # if SIZEOF_FLOAT == 16 # define CACTUS_MPI_REAL16 MPI_FLOAT # elif SIZEOF_DOUBLE == 16 # define CACTUS_MPI_REAL16 MPI_DOUBLE # elif SIZEOF_LONG_DOUBLE == 16 # define CACTUS_MPI_REAL16 MPI_LONG_DOUBLE # endif static MPI_Datatype CACTUS_MPI_COMPLEX8; static MPI_Datatype CACTUS_MPI_COMPLEX16; static MPI_Datatype CACTUS_MPI_COMPLEX32; #endif /* Replace MPI functions if MPI is disabled */ #ifndef CCTK_MPI typedef int MPI_Comm; typedef enum { CACTUS_MPI_BYTE = CCTK_VARIABLE_BYTE, CACTUS_MPI_INT = CCTK_VARIABLE_INT, CACTUS_MPI_INT1 = CCTK_VARIABLE_INT1, CACTUS_MPI_INT2 = CCTK_VARIABLE_INT2, CACTUS_MPI_INT4 = CCTK_VARIABLE_INT4, CACTUS_MPI_INT8 = CCTK_VARIABLE_INT8, CACTUS_MPI_REAL = CCTK_VARIABLE_REAL, CACTUS_MPI_REAL4 = CCTK_VARIABLE_REAL4, CACTUS_MPI_REAL8 = CCTK_VARIABLE_REAL8, CACTUS_MPI_REAL16 = CCTK_VARIABLE_REAL16, CACTUS_MPI_COMPLEX = CCTK_VARIABLE_COMPLEX, CACTUS_MPI_COMPLEX8 = CCTK_VARIABLE_COMPLEX8, CACTUS_MPI_COMPLEX16 = CCTK_VARIABLE_COMPLEX16, CACTUS_MPI_COMPLEX32 = CCTK_VARIABLE_COMPLEX32 } MPI_Datatype; static MPI_Datatype MPI_INT; typedef enum { MPI_MIN, MPI_MAX } MPI_Op; static int MPI_Barrier (MPI_Comm comm) { return 0; } static int MPI_Comm_size (MPI_Comm comm, int * size) { *size = 1; return 0; } static int MPI_Comm_rank (MPI_Comm comm, int * rank) { *rank = 0; return 0; } static int MPI_Allgather (void * sendbuf, int sendcnt, int sendtype, void * recvbuf, int recvcnt, int recvtype, MPI_Comm comm) { int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt == recvcnt); assert (recvcnt >= 0); assert (sendtype == recvtype); recvsize = CCTK_VarTypeSize (recvtype); assert (recvsize > 0); memcpy (recvbuf, sendbuf, recvcnt * recvsize); return 0; } static int MPI_Alltoall (void * sendbuf, int sendcnt, int sendtype, void * recvbuf, int recvcnt, int recvtype, MPI_Comm comm) { int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt == recvcnt); assert (recvcnt >= 0); assert (sendtype == recvtype); recvsize = CCTK_VarTypeSize (recvtype); assert (recvsize > 0); memcpy (recvbuf, sendbuf, recvcnt * recvsize); return 0; } static int MPI_Alltoallv (void * sendbuf, int * sendcnt, int * sendoff, int sendtype, void * recvbuf, int * recvcnt, int * recvoff, int recvtype, MPI_Comm comm) { int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt); assert (recvcnt); assert (*sendcnt == *recvcnt); assert (*recvcnt >= 0); assert (sendoff); assert (recvoff); assert (*sendoff == 0); assert (*recvoff == 0); assert (sendtype == recvtype); recvsize = CCTK_VarTypeSize (recvtype); assert (recvsize > 0); memcpy (recvbuf, sendbuf, *recvcnt * recvsize); return 0; } static int MPI_Allreduce (void * sendbuf, void * recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int recvsize; assert (sendbuf); assert (recvbuf); assert (count >= 0); recvsize = CCTK_VarTypeSize (datatype); assert (recvsize > 0); memcpy (recvbuf, sendbuf, count * recvsize); return 0; } #endif /* Get the MPI COMM_WOLRD communicator from the driver */ static MPI_Comm get_mpi_comm (const cGH * restrict const cctkGH) { #ifdef CCTK_MPI if (CCTK_IsFunctionAliased ("GetMPICommWorld")) { return * (MPI_Comm const *) GetMPICommWorld (cctkGH); } # if defined CARPET_CARPET { static int Carpet_active = -1; if (Carpet_active == -1) Carpet_active = CCTK_IsThornActive ("Carpet"); assert (Carpet_active >= 0); if (Carpet_active) return CarpetMPIComm (); } # endif # if defined CACTUSPUGH_PUGH { static int PUGH_active = -1; if (PUGH_active == -1) PUGH_active = CCTK_IsThornActive ("PUGH"); assert (PUGH_active >= 0); if (PUGH_active) return PUGH_pGH(cctkGH)->PUGH_COMM_WORLD; } # endif return MPI_COMM_WORLD; #else return 0; #endif } /* Initialise the MPI datatypes for complex variables */ int Slab_InitMPIDatatypes (void) { #ifdef CCTK_MPI # ifdef HAVE_CCTK_REAL4 MPI_Type_contiguous (2, CACTUS_MPI_REAL4, &CACTUS_MPI_COMPLEX8); MPI_Type_commit (&CACTUS_MPI_COMPLEX8); # endif # ifdef HAVE_CCTK_REAL8 MPI_Type_contiguous (2, CACTUS_MPI_REAL8, &CACTUS_MPI_COMPLEX16); MPI_Type_commit (&CACTUS_MPI_COMPLEX16); # endif # ifdef HAVE_CCTK_REAL16 MPI_Type_contiguous (2, CACTUS_MPI_REAL16, &CACTUS_MPI_COMPLEX32); MPI_Type_commit (&CACTUS_MPI_COMPLEX32); # endif #endif #ifndef CCTK_MPI switch (sizeof(int)) { #ifdef HAVE_CCTK_INT1 case sizeof(CCTK_INT1): MPI_INT = CCTK_VARIABLE_INT1; break; #endif #ifdef HAVE_CCTK_INT2 case sizeof(CCTK_INT2): MPI_INT = CCTK_VARIABLE_INT2; break; #endif #ifdef HAVE_CCTK_INT4 case sizeof(CCTK_INT4): MPI_INT = CCTK_VARIABLE_INT4; break; #endif #ifdef HAVE_CCTK_INT8 case sizeof(CCTK_INT8): MPI_INT = CCTK_VARIABLE_INT8; break; #endif default: assert(0); } #endif return 0; } /* Normalise a Cactus datatype */ static int normal_type (int cactustype) { switch (cactustype) { case CCTK_VARIABLE_INT: #ifdef CCTK_INTEGER_PRECISION_1 return CCTK_VARIABLE_INT1; #endif #ifdef CCTK_INTEGER_PRECISION_2 return CCTK_VARIABLE_INT2; #endif #ifdef CCTK_INTEGER_PRECISION_4 return CCTK_VARIABLE_INT4; #endif #ifdef CCTK_INTEGER_PRECISION_8 return CCTK_VARIABLE_INT8; #endif assert (0); case CCTK_VARIABLE_REAL: #ifdef CCTK_REAL_PRECISION_4 return CCTK_VARIABLE_REAL4; #endif #ifdef CCTK_REAL_PRECISION_8 return CCTK_VARIABLE_REAL8; #endif #ifdef CCTK_REAL_PRECISION_16 return CCTK_VARIABLE_REAL16; #endif assert (0); case CCTK_VARIABLE_COMPLEX: #ifdef CCTK_REAL_PRECISION_4 return CCTK_VARIABLE_COMPLEX8; #endif #ifdef CCTK_REAL_PRECISION_8 return CCTK_VARIABLE_COMPLEX16; #endif #ifdef CCTK_REAL_PRECISION_16 return CCTK_VARIABLE_COMPLEX32; #endif assert (0); } return cactustype; } /* Find the MPI datatype corresponding to a Cactus datatype */ static MPI_Datatype mpi_type (int const cactustype) { int const normaltype = normal_type (cactustype); switch (normaltype) { case CCTK_VARIABLE_BYTE: return CACTUS_MPI_BYTE; #ifdef HAVE_CCTK_INT1 case CCTK_VARIABLE_INT1: return CACTUS_MPI_INT1; #endif #ifdef HAVE_CCTK_INT2 case CCTK_VARIABLE_INT2: return CACTUS_MPI_INT2; #endif #ifdef HAVE_CCTK_INT4 case CCTK_VARIABLE_INT4: return CACTUS_MPI_INT4; #endif #ifdef HAVE_CCTK_INT8 case CCTK_VARIABLE_INT8: return CACTUS_MPI_INT8; #endif #ifdef HAVE_CCTK_REAL4 case CCTK_VARIABLE_REAL4: return CACTUS_MPI_REAL4; case CCTK_VARIABLE_COMPLEX8: return CACTUS_MPI_COMPLEX8; #endif #ifdef HAVE_CCTK_REAL8 case CCTK_VARIABLE_REAL8: return CACTUS_MPI_REAL8; case CCTK_VARIABLE_COMPLEX16: return CACTUS_MPI_COMPLEX16; #endif #ifdef HAVE_CCTK_REAL16 case CCTK_VARIABLE_REAL16: return CACTUS_MPI_REAL16; case CCTK_VARIABLE_COMPLEX32: return CACTUS_MPI_COMPLEX32; #endif } assert (0); CCTK_WARN (0, "internal error"); return CACTUS_MPI_BYTE; /* not reached */ } /*****************************************************************************/ struct bbox { int off, len, str; }; struct arrays { struct bbox global, local, active, slab; }; struct info { struct arrays src, dst; int xpose; int flip; }; static inline int roundup (int const x, int const y) { assert (x >= 0); assert (y > 0); return (x + y - 1) / y * y; } static void bbox_print (struct bbox const * restrict const bbox) { assert (bbox); printf ("[%d:%d:%d]", bbox->off, bbox->off + (bbox->len - 1) * bbox->str, bbox->str); } static void bbox_check (struct bbox const * restrict const bbox) { assert (bbox); assert (bbox->len >= 0); assert (bbox->str > 0); } static void global2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox) { assert (slab); assert (bbox); assert (slab->gsh >= 0); bbox->off = 0; bbox->len = slab->gsh; bbox->str = 1; bbox_check (bbox); } static void local2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox) { assert (slab); assert (bbox); assert (slab->lbnd >= 0); assert (slab->lsh >= 0); assert (slab->lbnd + slab->lsh <= slab->gsh); bbox->off = slab->lbnd; bbox->len = slab->lsh; bbox->str = 1; bbox_check (bbox); } static void active2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox, int const useghosts) { int nlghostzones; int nughostzones; assert (slab); assert (bbox); assert (useghosts == 0 || useghosts == 1); assert (slab->lbnd >= 0); assert (slab->lsh >= 0); assert (slab->lbnd + slab->lsh <= slab->gsh); assert (slab->lbbox == 0 || slab->lbbox == 1); assert (slab->ubbox == 0 || slab->ubbox == 1); assert (slab->nghostzones >= 0); nlghostzones = slab->lbbox || useghosts ? 0 : slab->nghostzones; nughostzones = slab->ubbox || useghosts ? 0 : slab->nghostzones; bbox->off = slab->lbnd + nlghostzones; bbox->len = slab->lsh - nlghostzones - nughostzones; bbox->str = 1; bbox_check (bbox); } static void slab2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox) { assert (slab); assert (bbox); bbox->off = slab->off; bbox->len = slab->len; bbox->str = slab->str; bbox_check (bbox); } static int bbox_iscontained (struct bbox const * restrict const inner, struct bbox const * restrict const outer) { int inner_last; int outer_last; bbox_check (inner); bbox_check (outer); inner_last = inner->off + (inner->len - 1) * inner->str; outer_last = outer->off + (outer->len - 1) * outer->str; return inner->off >= outer->off && inner_last <= outer_last; } static void bbox_clip (struct bbox * restrict const inner, struct bbox const * restrict const outer) { int inner_last; int outer_last; bbox_check (inner); bbox_check (outer); inner_last = inner->off + (inner->len - 1) * inner->str; outer_last = outer->off + (outer->len - 1) * outer->str; if (inner->off < outer->off) { inner->off += roundup (outer->off - inner->off, inner->str); } if (inner_last > outer_last) { inner_last -= roundup (inner_last - outer_last, inner->str); } assert ((inner_last - inner->off) % inner->str == 0); if (inner_last >= inner->off) { inner->len = (inner_last - inner->off + inner->str) / inner->str; } else { inner->len = 0; } bbox_check (inner); } static void bbox_xform (struct bbox * restrict const ydst, struct bbox const * restrict const ysrc, struct bbox const * restrict const xdst, struct bbox const * restrict const xsrc, int const flip) { int xsrc_last; int xdst_last; int ysrc_last; int ydst_last; assert (ydst); bbox_check (ysrc); bbox_check (xdst); bbox_check (xsrc); assert (ysrc->str == xsrc->str); xsrc_last = xsrc->off + (xsrc->len - 1) * xsrc->str; xdst_last = xdst->off + (xdst->len - 1) * xdst->str; ysrc_last = ysrc->off + (ysrc->len - 1) * ysrc->str; ydst->str = xdst->str; assert ((ysrc->off - xsrc->off) % ysrc->str == 0); ydst->off = xdst->off + (ysrc->off - xsrc->off) / ysrc->str * ydst->str; ydst_last = xdst->off + (ysrc_last - xsrc->off) / ysrc->str * ydst->str; if (flip) { int const off = ydst->off; int const last = ydst_last; ydst->off = xdst->off + xdst_last - last; ydst_last = xdst_last - (off - xdst->off); } assert ((ysrc_last - xsrc->off) % ysrc->str == 0); assert (ydst_last - ydst->off + ydst->str >= 0); ydst->len = (ydst_last - ydst->off + ydst->str) / ydst->str; bbox_check (ydst); } void print_slabinfo (FILE * const out, struct slabinfo const * const slabinfo) { fprintf (out, " gsh: %d\n", slabinfo->gsh); fprintf (out, " lbnd: %d, lsh: %d\n", slabinfo->lbnd, slabinfo->lsh); fprintf (out, " lbbox: %d, ubbox: %d, nghostzones: %d\n", slabinfo->lbbox, slabinfo->ubbox, slabinfo->nghostzones); fprintf (out, " off: %d, str: %d, len: %d\n", slabinfo->off, slabinfo->str, slabinfo->len); } void print_xferinfo (FILE * const out, struct xferinfo const * const xferinfo) { fprintf (out, " src:\n"); print_slabinfo (out, & xferinfo->src); fprintf (out, " dst:\n"); print_slabinfo (out, & xferinfo->dst); fprintf (out, " xpose: %d\n", xferinfo->xpose); fprintf (out, " flip: %d\n", xferinfo->flip); } int Slab_MultiTransfer (cGH const * const cctkGH, int const dim, struct xferinfo const * const xferinfo, int const options, int const nvars, int const * const srctypes, void const * const * const srcptrs, int const * const dsttypes, void * const * const dstptrs) { DECLARE_CCTK_PARAMETERS; struct info * restrict info; size_t srclentot, dstlentot; struct info * restrict allinfo; struct bbox * restrict srcdetail; struct bbox * restrict dstdetail; int * restrict srcelems; int * restrict srccount; int * restrict srcoffset; int * restrict dstelems; int * restrict dstcount; int * restrict dstoffset; int srctype; int dsttype; int srctypesize; int dsttypesize; void * restrict srcdata; void * restrict dstdata; MPI_Comm comm; int size, rank; MPI_Datatype srcdatatype, dstdatatype; int var; int i, j, k; int n; int d; /* Check arguments */ check (cctkGH); check (dim >= 0); check (xferinfo); check (nvars >= 0); check (nvars==0 || srctypes); for (var=0; var= 0); check (nvars==0 || srcptrs); /* for (var=0; var= 0); check (nvars==0 || dstptrs); /* for (var=0; var= 0 && info[d].xpose < dim); info[d].flip = xferinfo[d].flip; check (info[d].flip == 0 || info[d].flip == 1); } for (d=dim; d= 0 && info[d].xpose < SLAB_MAXDIM); info[d].flip = 0; check (info[d].flip == 0 || info[d].flip == 1); } ifcheck { ifdebug printf ("srcinfo:\n"); for (d=0; d 0) { assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len); } } } srclentot = 1; dstlentot = 1; for (d=0; d 0) assert (srcptrs[var]); for (var=0; var 0) assert (dstptrs[var]); { CCTK_POINTER tmp; int const iret = Util_TableGetPointer (options, &tmp, "comm"); if (iret == 1) { /* There was an entry, use it */ comm = * (MPI_Comm const *) tmp; } else if (iret == UTIL_ERROR_BAD_HANDLE || iret == UTIL_ERROR_TABLE_NO_SUCH_KEY) { /* There was no entry, use a default */ comm = get_mpi_comm (cctkGH); } else { /* Something went wrong, abort */ check (0); } } ifcheck { ifdebug fflush (stdout); MPI_Barrier (comm); } MPI_Comm_size (comm, &size); MPI_Comm_rank (comm, &rank); ifcheck { static int count = 424242; int mincount, maxcount; ifdebug fflush (stdout); MPI_Allreduce (&count, &mincount, 1, MPI_INT, MPI_MIN, comm); MPI_Allreduce (&count, &maxcount, 1, MPI_INT, MPI_MAX, comm); assert (mincount == count); assert (maxcount == count); ++ count; } check (nvars >= 1); srctype = srctypes[0]; dsttype = dsttypes[0]; for (var=0; var 0); dsttypesize = CCTK_VarTypeSize (dsttype); check (dsttypesize > 0); srcdatatype = mpi_type (srctype); check (srcdatatype >= 0); dstdatatype = mpi_type (dsttype); check (dstdatatype >= 0); allinfo = malloc (size * SLAB_MAXDIM * sizeof *allinfo); check (allinfo); { int const info_nints = sizeof(struct info) / sizeof(int); ifdebug fflush (stdout); MPI_Allgather (info, SLAB_MAXDIM * info_nints, MPI_INT, allinfo, SLAB_MAXDIM * info_nints, MPI_INT, comm); } for (n = 0; n < size; ++n) { for (d=0; d 0 && info[d].src.slab.len > 0) { assert (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off); assert (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len); assert (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str); assert (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str); assert (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str); /* 2003-03-01 schnetter: I don't know why the following should be necessary */ assert (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str); } if (allinfo[n*SLAB_MAXDIM+d].dst.slab.len > 0 && info[d].dst.slab.len > 0) { assert (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off); assert (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len); assert (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str); assert (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str); assert (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str); assert (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str); } assert (allinfo[n*SLAB_MAXDIM+d].xpose == info[d].xpose); assert (allinfo[n*SLAB_MAXDIM+d].flip == info[d].flip); } } srcdetail = malloc (size * SLAB_MAXDIM * sizeof *srcdetail); check (srcdetail); for (n = 0; n < size; ++n) { ifdebug printf ("srcdetail n=%d:\n", n); for (d=0; d=0 && srcindi=0 && srcindj=0 && srcindk=0 && srcindi=0 && srcindj=0 && srcindk= info[c].src.local.off && srcipos[c] < info[c].src.local.off + info[c].src.local.len); assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1)); bufipos[d] = ipos[d]; assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); } srcind = 0; bufind = 0; for (d=SLAB_MAXDIM-1; d>=0; --d) { int const c = info[d].xpose; srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; } assert (srcind < srclentot); assert (bufind < (size_t)srccount[n]); for (var=0; var= info[c].src.local.off && srcipos[c] < info[c].src.local.off + info[c].src.local.len); assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str); assert ((srcipos[c] - allinfo[n*SLAB_MAXDIM+c].src.slab.off) % allinfo[n*SLAB_MAXDIM+c].src.slab.str == 0); bufipos[d] = ipos[d]; assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); } srcind = 0; bufind = 0; for (d=SLAB_MAXDIM-1; d>=0; --d) { int const c = info[d].xpose; srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; } assert (srcind < srclentot); assert (bufind < (size_t)srccount[n]); /* ((CCTK_REAL*)srcdata)[srcoffset[n] + bufind] */ /* = ((const CCTK_REAL*)srcptr)[srcind]; */ for (var=0; var 0) { MPI_Irecv ((char *)dstdata + dsttypesize * dstoffset[n], dstcount[n], dstdatatype, n, 0, comm, &requests[n]); } else { requests[n] = MPI_REQUEST_NULL; } } /* Start send */ for (n = 0; n < size; ++n) { if (n != rank && srccount[n] > 0) { MPI_Isend ((char *)srcdata + srctypesize * srcoffset[n], srccount[n], srcdatatype, n, 0, comm, &requests[size + n]); } else { requests[size + n] = MPI_REQUEST_NULL; } } /* Self communication */ { n = rank; assert (dstcount[n] == srccount[n]); memcpy ((char *)dstdata + dsttypesize * dstoffset[n], (char *)srcdata + srctypesize * srcoffset[n], dstcount[n] * dsttypesize); } /* Wait */ MPI_Waitall (2 * size, requests, MPI_STATUSES_IGNORE); /* */ free (requests); } ifcheck { if (dsttype == CCTK_VARIABLE_REAL) { for (var=0; var=0 && dstindi=0 && dstindj=0 && dstindk=0 && dstindi=0 && dstindj=0 && dstindk=0 && dstindi=0 && dstindj=0 && dstindk=0 && dstindi=0 && dstindj=0 && dstindk= 0 && bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d]; ifcheck assert (dstipos[d] >= info[d].dst.local.off && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); ifcheck assert (dstipos[d] >= info[d].dst.slab.off && dstipos[d] <= info[d].dst.slab.off + info[d].dst.slab.len - 1); } bufind = 0; dstind = 0; for (d=SLAB_MAXDIM-1; d>=0; --d) { bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; } ifcheck assert (bufind < (size_t)dstcount[n]); ifcheck assert (dstind < dstlentot); for (var=0; var= 0 && bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d] * info[d].dst.slab.str; ifcheck assert (dstipos[d] >= info[d].dst.local.off && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); ifcheck assert (dstipos[d] >= info[d].dst.slab.off && dstipos[d] <= info[d].dst.slab.off + (info[d].dst.slab.len - 1) * info[d].dst.slab.str); ifcheck assert ((dstipos[d] - info[d].dst.slab.off) % info[d].dst.slab.str == 0); } bufind = 0; dstind = 0; for (d=SLAB_MAXDIM-1; d>=0; --d) { bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; } ifcheck assert (bufind < (size_t)dstcount[n]); ifcheck assert (dstind < dstlentot); /* ((CCTK_REAL*)dstptr)[dstind] */ /* = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; */ for (var=0; var