/* $Header$ */ /* TODO: Provide facilities for dim != 3 Set up the slab exchange information in advance Test slabbing without MPI Allow using / not setting the ghost zones Allow not using / not setting the boundaries */ #include #include #include #include #include "cctk.h" #include "cctk_DefineThorn.h" #ifdef CCTK_MPI # include #endif #include "slab.h" static const char * rcsid = "$Header$"; CCTK_FILEVERSION(TAT_Slab_slab_c); /* Print debug information? */ #undef DEBUG /* Perform expensive self-checks? */ #undef CHECK #ifdef DEBUG # define ifdebug #else # define ifdebug while (0) #endif #ifdef CHECK # define ifcheck #else # define ifcheck while (0) #endif /* Find out which driver to use */ #ifdef CCTK_MPI # if defined CARPET_CARPET # include "Carpet/Carpet/src/carpet_public.h" # elif defined CACTUSPUGH_PUGH # include "CactusPUGH/PUGH/src/include/pugh.h" # else # error "No supported driver thorn included" # endif #endif /* Replace MPI functions if MPI is disabled */ #ifndef CCTK_MPI typedef int MPI_Datatype; #define MPI_DOUBLE CCTK_VARIABLE_REAL8 #define MPI_INT CCTK_VARIABLE_INT4 typedef int MPI_Comm; static int MPI_Barrier (MPI_Comm comm) { return 0; } static int MPI_Comm_Size (MPI_Comm comm, int * size) { *size = 1; return 0; } static int MPI_Comm_Rank (MPI_Comm comm, int * rank) { *rank = 0; return 0; } static int MPI_Allgather (void * sendbuf, int sendcnt, int sendtype, void * recvbuf, int recvcnt, int recvtype, MPI_Comm comm) { int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt == recvcnt); assert (sendtype == recvtype); recvsize = CCTK_VarTypeSize (recvtype); assert (size > 0); memcpy (recvbuf, sendbuf, recvcnt * recvsize); return 0; } static int MPI_Alltoall (void * sendbuf, int sendcnt, int sendtype, void * recvbuf, int recvcnt, int recvtype, MPI_Comm comm) { int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt == recvcnt); assert (sendtype == recvtype); recvsize = CCTK_VarTypeSize (recvtype); assert (size > 0); memcpy (recvbuf, sendbuf, recvcnt * recvsize); return 0; } static int MPI_Alltoallv (void * sendbuf, int * sendcnt, int * sendoff, int sendtype, void * recvbuf, int * recvcnt, int * recvoff, int recvtype, MPI_Comm comm) { int recvsize; assert (sendbuf); assert (recvbuf); assert (sendcnt); assert (recvcnt); assert (*sendcnt == *recvcnt); assert (sendoff); assert (recvoff); assert (*sendoff == 0); assert (*recvoff == 0); assert (sendtype == recvtype); recvsize = CCTK_VarTypeSize (recvtype); assert (size > 0); memcpy (recvbuf, sendbuf, *recvcnt * recvsize); return 0; } #endif struct bbox { int off, len, str; }; struct arrays { struct bbox global, local, active, slab; }; struct info { struct arrays src, dst; int xpose; int flip; }; static inline int min (int const x, int const y) { return x < y ? x : y; } static inline int max (int const x, int const y) { return x > y ? x : y; } static inline int roundup (int const x, int const y) { assert (x >= 0); assert (y > 0); return (x + y - 1) / y * y; } static void bbox_print (struct bbox const * restrict const bbox) { assert (bbox); printf ("[%d:%d:%d]", bbox->off, bbox->off + (bbox->len - 1) * bbox->str, bbox->str); } static void bbox_check (struct bbox const * restrict const bbox) { assert (bbox); assert (bbox->len >= 0); assert (bbox->str > 0); } static void global2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox) { assert (slab); assert (bbox); assert (slab->gsh >= 0); bbox->off = 0; bbox->len = slab->gsh; bbox->str = 1; bbox_check (bbox); } static void local2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox) { assert (slab); assert (bbox); assert (slab->lbnd >= 0); assert (slab->lsh >= 0); assert (slab->lbnd + slab->lsh <= slab->gsh); bbox->off = slab->lbnd; bbox->len = slab->lsh; bbox->str = 1; bbox_check (bbox); } static void active2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox, int const useghosts) { int nlghostzones; int nughostzones; assert (slab); assert (bbox); assert (useghosts == 0 || useghosts == 1); assert (slab->lbnd >= 0); assert (slab->lsh >= 0); assert (slab->lbnd + slab->lsh <= slab->gsh); assert (slab->lbbox == 0 || slab->lbbox == 1); assert (slab->ubbox == 0 || slab->ubbox == 1); assert (slab->nghostzones >= 0); nlghostzones = slab->lbbox || useghosts ? 0 : slab->nghostzones; nughostzones = slab->ubbox || useghosts ? 0 : slab->nghostzones; bbox->off = slab->lbnd + nlghostzones; bbox->len = slab->lsh - nlghostzones - nughostzones; bbox->str = 1; bbox_check (bbox); } static void slab2bbox (struct slabinfo const * restrict const slab, struct bbox * restrict const bbox) { assert (slab); assert (bbox); bbox->off = slab->off; bbox->len = slab->len; bbox->str = slab->str; bbox_check (bbox); } static int bbox_iscontained (struct bbox const * restrict const inner, struct bbox const * restrict const outer) { int inner_last; int outer_last; bbox_check (inner); bbox_check (outer); inner_last = inner->off + (inner->len - 1) * inner->str; outer_last = outer->off + (outer->len - 1) * outer->str; return inner->off >= outer->off && inner_last <= outer_last; } static void bbox_clip (struct bbox * restrict const inner, struct bbox const * restrict const outer) { int inner_last; int outer_last; bbox_check (inner); bbox_check (outer); inner_last = inner->off + (inner->len - 1) * inner->str; outer_last = outer->off + (outer->len - 1) * outer->str; if (inner->off < outer->off) { inner->off += roundup (outer->off - inner->off, inner->str); } if (inner_last > outer_last) { inner_last -= roundup (inner_last - outer_last, inner->str); } assert ((inner_last - inner->off) % inner->str == 0); if (inner_last >= inner->off) { inner->len = (inner_last - inner->off + inner->str) / inner->str; } else { inner->len = 0; } bbox_check (inner); } static void bbox_xform (struct bbox * restrict const ydst, struct bbox const * restrict const ysrc, struct bbox const * restrict const xdst, struct bbox const * restrict const xsrc, int const flip) { int xsrc_last; int xdst_last; int ysrc_last; int ydst_last; assert (ydst); bbox_check (ysrc); bbox_check (xdst); bbox_check (xsrc); assert (ysrc->str == xsrc->str); xsrc_last = xsrc->off + (xsrc->len - 1) * xsrc->str; xdst_last = xdst->off + (xdst->len - 1) * xdst->str; ysrc_last = ysrc->off + (ysrc->len - 1) * ysrc->str; ydst->str = xdst->str; assert ((ysrc->off - xsrc->off) % ysrc->str == 0); ydst->off = xdst->off + (ysrc->off - xsrc->off) / ysrc->str * ydst->str; ydst_last = xdst->off + (ysrc_last - xsrc->off) / ysrc->str * ydst->str; if (flip) { int const off = ydst->off; int const last = ydst_last; ydst->off = xdst->off + xdst_last - last; ydst_last = xdst_last - (off - xdst->off); } assert ((ysrc_last - xsrc->off) % ysrc->str == 0); assert (ydst_last - ydst->off + ydst->str >= 0); ydst->len = (ydst_last - ydst->off + ydst->str) / ydst->str; bbox_check (ydst); } int Slab_Transfer (cGH * const cctkGH, int const dim, struct xferinfo const * const xferinfo, int const srctype, void const * const srcptr, int const dsttype, void * const dstptr) { struct info * restrict info; size_t srclentot, dstlentot; struct info * restrict allinfo; struct bbox * restrict srcdetail; struct bbox * restrict dstdetail; int * restrict srccount; int * restrict srcoffset; int * restrict dstcount; int * restrict dstoffset; void * restrict srcdata; void * restrict dstdata; MPI_Comm comm; int size, rank; MPI_Datatype srcdatatype, dstdatatype; int i, j, k; int n; int d; /* Check arguments */ assert (cctkGH); assert (dim >= 0); assert (xferinfo); assert (srctype == CCTK_VARIABLE_REAL); assert (srcptr); assert (dsttype == CCTK_VARIABLE_REAL); assert (dstptr); info = malloc (dim * sizeof *info); assert (info); for (d=0; d= 0 && info[d].xpose < dim); info[d].flip = xferinfo[d].flip; assert (info[d].flip == 0 || info[d].flip == 1); } { int iflag[dim]; for (d=0; dPUGH_COMM_WORLD; # else # error "No supported driver thorn included" # endif #else comm = 0; #endif ifcheck { ifdebug fflush (stdout); MPI_Barrier (comm); } MPI_Comm_size (comm, &size); MPI_Comm_rank (comm, &rank); assert (sizeof(CCTK_REAL) == sizeof(double)); srcdatatype = MPI_DOUBLE; dstdatatype = MPI_DOUBLE; allinfo = malloc (size * dim * sizeof *allinfo); assert (allinfo); { int const info_nints = sizeof(struct info) / sizeof(int); ifdebug fflush (stdout); MPI_Allgather (info, dim * info_nints, MPI_INT, allinfo, dim * info_nints, MPI_INT, comm); } for (n = 0; n < size; ++n) { for (d=0; d= info[c].src.local.off && srcipos[c] < info[c].src.local.off + info[c].src.local.len); if (! (srcipos[c] >= allinfo[n*dim+c].src.slab.off && srcipos[c] <= allinfo[n*dim+c].src.slab.off + (allinfo[n*dim+c].src.slab.len - 1) * allinfo[n*dim+c].src.slab.str)) { printf ("ssc n=%d ipos=[%d,%d,%d] d=%d srcipos=%d slab.off=%d slab.len=%d\n", n, i, j, k, d, srcipos[c], allinfo[n*dim+c].src.slab.off, allinfo[n*dim+c].src.slab.len); } assert (srcipos[c] >= allinfo[n*dim+c].src.slab.off && srcipos[c] <= allinfo[n*dim+c].src.slab.off + (allinfo[n*dim+c].src.slab.len - 1) * allinfo[n*dim+c].src.slab.str); assert ((srcipos[c] - allinfo[n*dim+c].src.slab.off) % allinfo[n*dim+c].src.slab.str == 0); bufipos[d] = ipos[d]; assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*dim+c].len); } srcind = 0; bufind = 0; for (d=dim-1; d>=0; --d) { int const c = info[d].xpose; srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; bufind = bufind * srcdetail[n*dim+c].len + bufipos[d]; } assert (srcind < srclentot); assert (bufind < srccount[n]); ((CCTK_REAL*)srcdata)[srcoffset[n] + bufind] = ((const CCTK_REAL*)srcptr)[srcind]; } } } } ifcheck { const CCTK_REAL * restrict const srcptr = srcdata; CCTK_REAL marker; memset (&marker, -1, sizeof marker); for (i = 0; i < srcoffset[size]; ++i) { assert (memcmp(&srcptr[i], &marker, sizeof marker) != 0); } } ifdebug fflush (stdout); MPI_Alltoallv (srcdata, srccount, srcoffset, srcdatatype, dstdata, dstcount, dstoffset, dstdatatype, comm); ifcheck { const CCTK_REAL * restrict const dstptr = dstdata; CCTK_REAL marker; memset (&marker, -1, sizeof marker); for (i = 0; i < dstoffset[size]; ++i) { assert (memcmp(&dstptr[i], &marker, sizeof marker) != 0); } } for (n = 0; n < size; ++n) { assert (dim == 3); for (k = 0; k < dstdetail[n*dim+2].len; ++k) { for (j = 0; j < dstdetail[n*dim+1].len; ++j) { for (i = 0; i < dstdetail[n*dim+0].len; ++i) { int ipos[3]; int bufipos[3]; int dstipos[3]; size_t bufind; size_t dstind; ipos[0] = i; ipos[1] = j; ipos[2] = k; for (d=0; d= 0 && bufipos[d] < dstdetail[n*dim+d].len); dstipos[d] = dstdetail[n*dim+d].off + ipos[d] * info[d].dst.slab.str; assert (dstipos[d] >= info[d].dst.local.off && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); assert (dstipos[d] >= info[d].dst.slab.off && dstipos[d] <= info[d].dst.slab.off + (info[d].dst.slab.len - 1) * info[d].dst.slab.str); assert ((dstipos[d] - info[d].dst.slab.off) % info[d].dst.slab.str == 0); } bufind = 0; dstind = 0; for (d=dim-1; d>=0; --d) { bufind = bufind * dstdetail[n*dim+d].len + bufipos[d]; dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; } assert (bufind < dstcount[n]); assert (dstind < dstlentot); ((CCTK_REAL*)dstptr)[dstind] = ((const CCTK_REAL*)dstdata)[dstoffset[n] + bufind]; } } } } free (dstdata); free (dstcount); free (dstoffset); free (dstdetail); free (srcdata); free (srccount); free (srcoffset); free (srcdetail); free (allinfo); free (info); ifcheck { ifdebug fflush (stdout); MPI_Barrier (comm); } return 0; }