diff options
Diffstat (limited to 'src/slab.c')
-rw-r--r-- | src/slab.c | 1838 |
1 files changed, 0 insertions, 1838 deletions
diff --git a/src/slab.c b/src/slab.c deleted file mode 100644 index f06b0c8..0000000 --- a/src/slab.c +++ /dev/null @@ -1,1838 +0,0 @@ -/* $Header$ */ - -/* - TODO: - Provide facilities for dim > 3 - Set up the slab exchange information in advance - Slab in several stages - Test slabbing without MPI - Allow not setting the ghost zones - Allow not using / not setting the boundaries - Allow different numbers of ghost zones at the lower and upper boundary -*/ - - - -/* Print debug information? */ -#undef DEBUG - -/* Perform expensive self-checks? */ -#undef CHECK - -/* Omit all self-checks? (Overrides CHECK) */ -#undef NDEBUG -#define NDEBUG - -/* Byte value for poison checks: use 255 for nan, or e.g. 113 for a - large value */ -#define POISON_VALUE 254 - - - -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "cctk.h" -#include "cctk_Arguments.h" -#include "cctk_Parameters.h" -#include "cctk_DefineThorn.h" -#include "util_ErrorCodes.h" -#include "util_Table.h" - -#ifdef CCTK_MPI -# include <mpi.h> -# define HAVE_MPI 1 -#else -# define HAVE_MPI 0 -#endif - -#include "slab.h" - - - -static const char * rcsid = "$Header$"; - -CCTK_FILEVERSION(TAT_Slab_slab_c); - - - -#ifdef DEBUG -# define ifdebug -#else -# define ifdebug while (0) -#endif - -#ifdef CHECK -# define ifcheck -#else -# define ifcheck while (0) -#endif - -#ifdef NDEBUG -# define check(x) ((x) ? 0 : CCTK_WARN (0, "internal error")) -#else -# define check(x) assert (x) -#endif - - - -static int timer_init = -1; -static int timer_copy_in = -1; -static int timer_xfer = -1; -static int timer_copy_back = -1; - -void -Slab_InitTimers (CCTK_ARGUMENTS) -{ - DECLARE_CCTK_ARGUMENTS; - timer_init = CCTK_TimerCreate ("Slab/init"); - timer_copy_in = CCTK_TimerCreate ("Slab/copy in"); - timer_xfer = CCTK_TimerCreate ("Slab/xfer"); - timer_copy_back = CCTK_TimerCreate ("Slab/copy back"); -} - -int -Slab_PrintTimers (void) -{ - CCTK_TimerPrintDataI (timer_init , -1); - CCTK_TimerPrintDataI (timer_copy_in , -1); - CCTK_TimerPrintDataI (timer_xfer , -1); - CCTK_TimerPrintDataI (timer_copy_back, -1); - return 0; -} - - - -/* Find out which driver to use */ -#ifdef CCTK_MPI -# if defined CARPET_CARPET -# include "Carpet/Carpet/src/carpet_public.h" -# endif -# if defined CACTUSPUGH_PUGH -# include "CactusPUGH/PUGH/src/include/pugh.h" -# endif -#endif - - - -#ifdef CCTK_MPI -/* Determine MPI type sizes */ - -# define CACTUS_MPI_BYTE MPI_CHAR - -# define CACTUS_MPI_INT1 MPI_CHAR - -# if SIZEOF_SHORT_INT == 2 -# define CACTUS_MPI_INT2 MPI_SHORT -# elif SIZEOF_INT == 2 -# define CACTUS_MPI_INT2 MPI_INT -# elif SIZEOF_LONG_INT == 2 -# define CACTUS_MPI_INT2 MPI_LONG -# elif SIZEOF_LONG_LONG == 2 -# define CACTUS_MPI_INT2 MPI_LONG_LONG_INT -# endif - -# if SIZEOF_SHORT_INT == 4 -# define CACTUS_MPI_INT4 MPI_SHORT -# elif SIZEOF_INT == 4 -# define CACTUS_MPI_INT4 MPI_INT -# elif SIZEOF_LONG_INT == 4 -# define CACTUS_MPI_INT4 MPI_LONG -# elif SIZEOF_LONG_LONG == 4 -# define CACTUS_MPI_INT4 MPI_LONG_LONG_INT -# endif - -# if SIZEOF_SHORT_INT == 8 -# define CACTUS_MPI_INT8 MPI_SHORT -# elif SIZEOF_INT == 8 -# define CACTUS_MPI_INT8 MPI_INT -# elif SIZEOF_LONG_INT == 8 -# define CACTUS_MPI_INT8 MPI_LONG -# elif SIZEOF_LONG_LONG == 8 -# define CACTUS_MPI_INT8 MPI_LONG_LONG_INT -# endif - -# if SIZEOF_FLOAT == 4 -# define CACTUS_MPI_REAL4 MPI_FLOAT -# elif SIZEOF_DOUBLE == 4 -# define CACTUS_MPI_REAL4 MPI_DOUBLE -# elif SIZEOF_LONG_DOUBLE == 4 -# define CACTUS_MPI_REAL4 MPI_LONG_DOUBLE -# endif - -# if SIZEOF_FLOAT == 8 -# define CACTUS_MPI_REAL8 MPI_FLOAT -# elif SIZEOF_DOUBLE == 8 -# define CACTUS_MPI_REAL8 MPI_DOUBLE -# elif SIZEOF_LONG_DOUBLE == 8 -# define CACTUS_MPI_REAL8 MPI_LONG_DOUBLE -# endif - -# if SIZEOF_FLOAT == 16 -# define CACTUS_MPI_REAL16 MPI_FLOAT -# elif SIZEOF_DOUBLE == 16 -# define CACTUS_MPI_REAL16 MPI_DOUBLE -# elif SIZEOF_LONG_DOUBLE == 16 -# define CACTUS_MPI_REAL16 MPI_LONG_DOUBLE -# endif - -static MPI_Datatype CACTUS_MPI_COMPLEX8; -static MPI_Datatype CACTUS_MPI_COMPLEX16; -static MPI_Datatype CACTUS_MPI_COMPLEX32; - -#endif - - - -/* Replace MPI functions if MPI is disabled */ -#ifndef CCTK_MPI - -typedef int MPI_Comm; - -typedef enum { - CACTUS_MPI_BYTE = CCTK_VARIABLE_BYTE, - CACTUS_MPI_INT = CCTK_VARIABLE_INT, - CACTUS_MPI_INT1 = CCTK_VARIABLE_INT1, - CACTUS_MPI_INT2 = CCTK_VARIABLE_INT2, - CACTUS_MPI_INT4 = CCTK_VARIABLE_INT4, - CACTUS_MPI_INT8 = CCTK_VARIABLE_INT8, - CACTUS_MPI_REAL = CCTK_VARIABLE_REAL, - CACTUS_MPI_REAL4 = CCTK_VARIABLE_REAL4, - CACTUS_MPI_REAL8 = CCTK_VARIABLE_REAL8, - CACTUS_MPI_REAL16 = CCTK_VARIABLE_REAL16, - CACTUS_MPI_COMPLEX = CCTK_VARIABLE_COMPLEX, - CACTUS_MPI_COMPLEX8 = CCTK_VARIABLE_COMPLEX8, - CACTUS_MPI_COMPLEX16 = CCTK_VARIABLE_COMPLEX16, - CACTUS_MPI_COMPLEX32 = CCTK_VARIABLE_COMPLEX32 -} MPI_Datatype; - -static MPI_Datatype MPI_INT; - -typedef enum { MPI_MIN, MPI_MAX } MPI_Op; - -static int -MPI_Barrier (MPI_Comm comm) -{ - return 0; -} - -static int -MPI_Comm_size (MPI_Comm comm, int * size) -{ - *size = 1; - return 0; -} - -static int -MPI_Comm_rank (MPI_Comm comm, int * rank) -{ - *rank = 0; - return 0; -} - -static int -MPI_Allgather (void * sendbuf, int sendcnt, int sendtype, - void * recvbuf, int recvcnt, int recvtype, - MPI_Comm comm) -{ - int recvsize; - assert (sendbuf); - assert (recvbuf); - assert (sendcnt == recvcnt); - assert (recvcnt >= 0); - assert (sendtype == recvtype); - recvsize = CCTK_VarTypeSize (recvtype); - assert (recvsize > 0); - memcpy (recvbuf, sendbuf, recvcnt * recvsize); - return 0; -} - -static int -MPI_Alltoall (void * sendbuf, int sendcnt, int sendtype, - void * recvbuf, int recvcnt, int recvtype, - MPI_Comm comm) -{ - int recvsize; - assert (sendbuf); - assert (recvbuf); - assert (sendcnt == recvcnt); - assert (recvcnt >= 0); - assert (sendtype == recvtype); - recvsize = CCTK_VarTypeSize (recvtype); - assert (recvsize > 0); - memcpy (recvbuf, sendbuf, recvcnt * recvsize); - return 0; -} - -static int -MPI_Alltoallv (void * sendbuf, int * sendcnt, int * sendoff, int sendtype, - void * recvbuf, int * recvcnt, int * recvoff, int recvtype, - MPI_Comm comm) -{ - int recvsize; - assert (sendbuf); - assert (recvbuf); - assert (sendcnt); - assert (recvcnt); - assert (*sendcnt == *recvcnt); - assert (*recvcnt >= 0); - assert (sendoff); - assert (recvoff); - assert (*sendoff == 0); - assert (*recvoff == 0); - assert (sendtype == recvtype); - recvsize = CCTK_VarTypeSize (recvtype); - assert (recvsize > 0); - memcpy (recvbuf, sendbuf, *recvcnt * recvsize); - return 0; -} - -static int -MPI_Allreduce (void * sendbuf, void * recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) -{ - int recvsize; - assert (sendbuf); - assert (recvbuf); - assert (count >= 0); - recvsize = CCTK_VarTypeSize (datatype); - assert (recvsize > 0); - memcpy (recvbuf, sendbuf, count * recvsize); - return 0; -} - -typedef int MPI_Request; /* dummy */ -typedef int MPI_Status; /* dummy */ -#define MPI_REQUEST_NULL 0 /* dummy */ -#define MPI_STATUSES_IGNORE 0 /* dummy */ - -static int -MPI_Irecv (void *buf, int count, MPI_Datatype datatype, - int source, int tag, MPI_Comm comm, MPI_Request *request) -{ - abort(); -} - -static int -MPI_Isend (void *buf, int count, MPI_Datatype datatype, int dest, - int tag, MPI_Comm comm, MPI_Request *request) -{ - abort(); -} - -static int -MPI_Waitall (int count, MPI_Request *array_of_requests, - MPI_Status *array_of_statuses) -{ - abort(); -} - -#endif - - - -/* Get the MPI COMM_WOLRD communicator from the driver */ -static MPI_Comm -get_mpi_comm (const cGH * restrict const cctkGH) -{ -#ifdef CCTK_MPI - if (CCTK_IsFunctionAliased ("GetMPICommWorld")) { - return * (MPI_Comm const *) GetMPICommWorld (cctkGH); - } -# if defined CARPET_CARPET - { - static int Carpet_active = -1; - if (Carpet_active == -1) Carpet_active = CCTK_IsThornActive ("Carpet"); - assert (Carpet_active >= 0); - if (Carpet_active) return CarpetMPIComm (); - } -# endif -# if defined CACTUSPUGH_PUGH - { - static int PUGH_active = -1; - if (PUGH_active == -1) PUGH_active = CCTK_IsThornActive ("PUGH"); - assert (PUGH_active >= 0); - if (PUGH_active) return PUGH_pGH(cctkGH)->PUGH_COMM_WORLD; - } -# endif - return MPI_COMM_WORLD; -#else - return 0; -#endif -} - - - -/* Initialise the MPI datatypes for complex variables */ -int -Slab_InitMPIDatatypes (void) -{ -#ifdef CCTK_MPI -# ifdef HAVE_CCTK_REAL4 - MPI_Type_contiguous (2, CACTUS_MPI_REAL4, &CACTUS_MPI_COMPLEX8); - MPI_Type_commit (&CACTUS_MPI_COMPLEX8); -# endif -# ifdef HAVE_CCTK_REAL8 - MPI_Type_contiguous (2, CACTUS_MPI_REAL8, &CACTUS_MPI_COMPLEX16); - MPI_Type_commit (&CACTUS_MPI_COMPLEX16); -# endif -# ifdef HAVE_CCTK_REAL16 - MPI_Type_contiguous (2, CACTUS_MPI_REAL16, &CACTUS_MPI_COMPLEX32); - MPI_Type_commit (&CACTUS_MPI_COMPLEX32); -# endif -#endif - -#ifndef CCTK_MPI - switch (sizeof(int)) { -#ifdef HAVE_CCTK_INT1 - case sizeof(CCTK_INT1): MPI_INT = CCTK_VARIABLE_INT1; break; -#endif -#ifdef HAVE_CCTK_INT2 - case sizeof(CCTK_INT2): MPI_INT = CCTK_VARIABLE_INT2; break; -#endif -#ifdef HAVE_CCTK_INT4 - case sizeof(CCTK_INT4): MPI_INT = CCTK_VARIABLE_INT4; break; -#endif -#ifdef HAVE_CCTK_INT8 - case sizeof(CCTK_INT8): MPI_INT = CCTK_VARIABLE_INT8; break; -#endif - default: assert(0); - } -#endif - - return 0; -} - - - -/* Normalise a Cactus datatype */ -static int -normal_type (int cactustype) -{ - switch (cactustype) { - case CCTK_VARIABLE_INT: -#ifdef CCTK_INTEGER_PRECISION_1 - return CCTK_VARIABLE_INT1; -#endif -#ifdef CCTK_INTEGER_PRECISION_2 - return CCTK_VARIABLE_INT2; -#endif -#ifdef CCTK_INTEGER_PRECISION_4 - return CCTK_VARIABLE_INT4; -#endif -#ifdef CCTK_INTEGER_PRECISION_8 - return CCTK_VARIABLE_INT8; -#endif - assert (0); - case CCTK_VARIABLE_REAL: -#ifdef CCTK_REAL_PRECISION_4 - return CCTK_VARIABLE_REAL4; -#endif -#ifdef CCTK_REAL_PRECISION_8 - return CCTK_VARIABLE_REAL8; -#endif -#ifdef CCTK_REAL_PRECISION_16 - return CCTK_VARIABLE_REAL16; -#endif - assert (0); - case CCTK_VARIABLE_COMPLEX: -#ifdef CCTK_REAL_PRECISION_4 - return CCTK_VARIABLE_COMPLEX8; -#endif -#ifdef CCTK_REAL_PRECISION_8 - return CCTK_VARIABLE_COMPLEX16; -#endif -#ifdef CCTK_REAL_PRECISION_16 - return CCTK_VARIABLE_COMPLEX32; -#endif - assert (0); - } - return cactustype; -} - - - -/* Find the MPI datatype corresponding to a Cactus datatype */ -static MPI_Datatype -mpi_type (int const cactustype) -{ - int const normaltype = normal_type (cactustype); - switch (normaltype) { - case CCTK_VARIABLE_BYTE: return CACTUS_MPI_BYTE; -#ifdef HAVE_CCTK_INT1 - case CCTK_VARIABLE_INT1: return CACTUS_MPI_INT1; -#endif -#ifdef HAVE_CCTK_INT2 - case CCTK_VARIABLE_INT2: return CACTUS_MPI_INT2; -#endif -#ifdef HAVE_CCTK_INT4 - case CCTK_VARIABLE_INT4: return CACTUS_MPI_INT4; -#endif -#ifdef HAVE_CCTK_INT8 - case CCTK_VARIABLE_INT8: return CACTUS_MPI_INT8; -#endif -#ifdef HAVE_CCTK_REAL4 - case CCTK_VARIABLE_REAL4: return CACTUS_MPI_REAL4; - case CCTK_VARIABLE_COMPLEX8: return CACTUS_MPI_COMPLEX8; -#endif -#ifdef HAVE_CCTK_REAL8 - case CCTK_VARIABLE_REAL8: return CACTUS_MPI_REAL8; - case CCTK_VARIABLE_COMPLEX16: return CACTUS_MPI_COMPLEX16; -#endif -#ifdef HAVE_CCTK_REAL16 - case CCTK_VARIABLE_REAL16: return CACTUS_MPI_REAL16; - case CCTK_VARIABLE_COMPLEX32: return CACTUS_MPI_COMPLEX32; -#endif - } - assert (0); - CCTK_WARN (0, "internal error"); - return CACTUS_MPI_BYTE; /* not reached */ -} - - - -/*****************************************************************************/ - - - -struct bbox { - int off; /* offset */ - int len; /* length */ - int str; /* stride */ -}; - -struct arrays { - struct bbox global; /* global region (all processors) */ - struct bbox local; /* this processor's region */ - struct bbox active; /* ??? */ - struct bbox slab; /* the slab that should be transferred */ -}; - -struct info { - struct arrays src; /* source region */ - struct arrays dst; /* destination region */ - int xpose; /* exchange axes */ - int flip; /* change directions of axes */ -}; - - - -static inline int -roundup (int const x, int const y) -{ - assert (x >= 0); - assert (y > 0); - return (x + y - 1) / y * y; -} - - - -static void -bbox_print (struct bbox const * restrict const bbox) -{ - assert (bbox); - printf - ("[%d:%d:%d]", - bbox->off, bbox->off + (bbox->len - 1) * bbox->str, bbox->str); -} - -static void -bbox_check (struct bbox const * restrict const bbox) -{ - assert (bbox); - assert (bbox->len >= 0); - assert (bbox->str > 0); -} - -static void -global2bbox (struct slabinfo const * restrict const slab, - struct bbox * restrict const bbox) -{ - assert (slab); - assert (bbox); - assert (slab->gsh >= 0); - bbox->off = 0; - bbox->len = slab->gsh; - bbox->str = 1; - bbox_check (bbox); -} - -static void -local2bbox (struct slabinfo const * restrict const slab, - struct bbox * restrict const bbox) -{ - assert (slab); - assert (bbox); - assert (slab->lbnd >= 0); - assert (slab->lsh >= 0); - assert (slab->lbnd + slab->lsh <= slab->gsh); - bbox->off = slab->lbnd; - bbox->len = slab->lsh; - bbox->str = 1; - bbox_check (bbox); -} - -static void -active2bbox (struct slabinfo const * restrict const slab, - struct bbox * restrict const bbox, - int const useghosts) -{ - int nlghostzones; - int nughostzones; - assert (slab); - assert (bbox); - assert (useghosts == 0 || useghosts == 1); - assert (slab->lbnd >= 0); - assert (slab->lsh >= 0); - assert (slab->lbnd + slab->lsh <= slab->gsh); - assert (slab->lbbox == 0 || slab->lbbox == 1); - assert (slab->ubbox == 0 || slab->ubbox == 1); - assert (slab->nghostzones >= 0); - nlghostzones = slab->lbbox || useghosts ? 0 : slab->nghostzones; - nughostzones = slab->ubbox || useghosts ? 0 : slab->nghostzones; - bbox->off = slab->lbnd + nlghostzones; - bbox->len = slab->lsh - nlghostzones - nughostzones; - bbox->str = 1; - bbox_check (bbox); -} - -static void -slab2bbox (struct slabinfo const * restrict const slab, - struct bbox * restrict const bbox) -{ - assert (slab); - assert (bbox); - bbox->off = slab->off; - bbox->len = slab->len; - bbox->str = slab->str; - bbox_check (bbox); -} - -static int -bbox_iscontained (struct bbox const * restrict const inner, - struct bbox const * restrict const outer) -{ - int inner_last; - int outer_last; - bbox_check (inner); - bbox_check (outer); - inner_last = inner->off + (inner->len - 1) * inner->str; - outer_last = outer->off + (outer->len - 1) * outer->str; - return inner->off >= outer->off && inner_last <= outer_last; -} - -static void -bbox_clip (struct bbox * restrict const inner, - struct bbox const * restrict const outer) -{ - int inner_last; - int outer_last; - bbox_check (inner); - bbox_check (outer); - inner_last = inner->off + (inner->len - 1) * inner->str; - outer_last = outer->off + (outer->len - 1) * outer->str; - if (inner->off < outer->off) { - inner->off += roundup (outer->off - inner->off, inner->str); - } - if (inner_last > outer_last) { - inner_last -= roundup (inner_last - outer_last, inner->str); - } - assert ((inner_last - inner->off) % inner->str == 0); - if (inner_last >= inner->off) { - inner->len = (inner_last - inner->off + inner->str) / inner->str; - } else { - inner->len = 0; - } - bbox_check (inner); -} - -static void -bbox_xform (struct bbox * restrict const ydst, - struct bbox const * restrict const ysrc, - struct bbox const * restrict const xdst, - struct bbox const * restrict const xsrc, - int const flip) -{ - int xsrc_last; - int xdst_last; - int ysrc_last; - int ydst_last; - assert (ydst); - bbox_check (ysrc); - bbox_check (xdst); - bbox_check (xsrc); - assert (ysrc->str == xsrc->str); - xsrc_last = xsrc->off + (xsrc->len - 1) * xsrc->str; - xdst_last = xdst->off + (xdst->len - 1) * xdst->str; - ysrc_last = ysrc->off + (ysrc->len - 1) * ysrc->str; - ydst->str = xdst->str; - assert ((ysrc->off - xsrc->off) % ysrc->str == 0); - ydst->off = xdst->off + (ysrc->off - xsrc->off) / ysrc->str * ydst->str; - ydst_last = xdst->off + (ysrc_last - xsrc->off) / ysrc->str * ydst->str; - if (flip) { - int const off = ydst->off; - int const last = ydst_last; - ydst->off = xdst->off + xdst_last - last; - ydst_last = xdst_last - (off - xdst->off); - } - assert ((ysrc_last - xsrc->off) % ysrc->str == 0); - assert (ydst_last - ydst->off + ydst->str >= 0); - ydst->len = (ydst_last - ydst->off + ydst->str) / ydst->str; - bbox_check (ydst); -} - - - -void -print_slabinfo (FILE * const out, - struct slabinfo const * restrict const slabinfo) -{ - fprintf (out, " gsh: %d\n", slabinfo->gsh); - fprintf (out, " lbnd: %d, lsh: %d\n", slabinfo->lbnd, slabinfo->lsh); - fprintf (out, " lbbox: %d, ubbox: %d, nghostzones: %d\n", - slabinfo->lbbox, slabinfo->ubbox, slabinfo->nghostzones); - fprintf (out, " off: %d, str: %d, len: %d\n", - slabinfo->off, slabinfo->str, slabinfo->len); -} - -void -print_xferinfo (FILE * const out, - struct xferinfo const * restrict const xferinfo) -{ - fprintf (out, " src:\n"); - print_slabinfo (out, & xferinfo->src); - fprintf (out, " dst:\n"); - print_slabinfo (out, & xferinfo->dst); - fprintf (out, " xpose: %d\n", xferinfo->xpose); - fprintf (out, " flip: %d\n", xferinfo->flip); -} - - - -int -Slab_MultiTransfer (cGH const * restrict const cctkGH, - int const dim, - struct xferinfo const * restrict const xferinfo, - int const options, - int const nvars, - int const * restrict const srctypes, - void const * restrict const * restrict const srcptrs, - int const * restrict const dsttypes, - void * restrict const * restrict const dstptrs) -{ - DECLARE_CCTK_PARAMETERS; - - struct info * restrict info; - size_t srclentot, dstlentot; - - int useghosts; - - struct info * restrict allinfo; - struct bbox * restrict srcdetail; - struct bbox * restrict dstdetail; - - int * restrict srcelems; - int * restrict srccount; - int * restrict srcoffset; - int * restrict dstelems; - int * restrict dstcount; - int * restrict dstoffset; - - int srctype; - int dsttype; - int srctypesize; - int dsttypesize; - void * restrict srcdata; - void * restrict dstdata; - - MPI_Comm comm; - int size, rank; - MPI_Datatype srcdatatype, dstdatatype; - - int var; - int i, j, k; - int n; - int d; - - /* Check arguments */ - check (cctkGH); - check (dim >= 0); - check (xferinfo); - check (nvars >= 0); - check (nvars==0 || srctypes); - for (var=0; var<nvars; ++var) check (srctypes[var] >= 0); - check (nvars==0 || srcptrs); -/* for (var=0; var<nvars; ++var) check (srcptrs[var]); */ - check (nvars==0 || dsttypes); - for (var=0; var<nvars; ++var) check (dsttypes[var] >= 0); - check (nvars==0 || dstptrs); -/* for (var=0; var<nvars; ++var) check (dstptrs[var]); */ - - if (nvars==0) return 0; - - CCTK_TimerStartI (timer_init); - - { - CCTK_INT tmp; - int const iret = Util_TableGetInt (options, &tmp, "useghosts"); - if (iret == 1) { - /* There was an entry, use it */ - useghosts = tmp; - } else if (iret == UTIL_ERROR_BAD_HANDLE - || iret == UTIL_ERROR_TABLE_NO_SUCH_KEY) { - /* There was no entry, use a default */ - useghosts = 0; - } else { - /* Something went wrong, abort */ - check (0); - } - } - - check (dim <= SLAB_MAXDIM); - info = malloc (SLAB_MAXDIM * sizeof *info); - check (info); - for (d=0; d<dim; ++d) { - global2bbox (&xferinfo[d].src, &info[d].src.global); - local2bbox (&xferinfo[d].src, &info[d].src.local); - active2bbox (&xferinfo[d].src, &info[d].src.active, useghosts); - slab2bbox (&xferinfo[d].src, &info[d].src.slab); - check (bbox_iscontained (&info[d].src.active, &info[d].src.local)); - check (bbox_iscontained (&info[d].src.local, &info[d].src.global)); - - global2bbox (&xferinfo[d].dst, &info[d].dst.global); - local2bbox (&xferinfo[d].dst, &info[d].dst.local); - active2bbox (&xferinfo[d].dst, &info[d].dst.active, 1); /* fill ghosts */ - slab2bbox (&xferinfo[d].dst, &info[d].dst.slab); - check (bbox_iscontained (&info[d].dst.active, &info[d].dst.local)); - check (bbox_iscontained (&info[d].dst.local, &info[d].dst.global)); - - info[d].xpose = xferinfo[d].xpose; - check (info[d].xpose >= 0 && info[d].xpose < dim); - info[d].flip = xferinfo[d].flip; - check (info[d].flip == 0 || info[d].flip == 1); - } - for (d=dim; d<SLAB_MAXDIM; ++d) { - static struct bbox const fake_bbox = { 0, 1, 1 }; - static struct arrays const fake_arrays - = { { 0, 1, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 0, 1, 1 } }; - - bbox_check (&fake_bbox); - - info[d].src = fake_arrays; - check (bbox_iscontained (&info[d].src.active, &info[d].src.local)); - check (bbox_iscontained (&info[d].src.local, &info[d].src.global)); - - info[d].dst = fake_arrays; - check (bbox_iscontained (&info[d].dst.active, &info[d].dst.local)); - check (bbox_iscontained (&info[d].dst.local, &info[d].dst.global)); - - info[d].xpose = d; - check (info[d].xpose >= 0 && info[d].xpose < SLAB_MAXDIM); - info[d].flip = 0; - check (info[d].flip == 0 || info[d].flip == 1); - } - - ifcheck { - ifdebug printf ("srcinfo:\n"); - for (d=0; d<SLAB_MAXDIM; ++d) { - printf (" src.global d=%d ", d); - bbox_print (&info[d].src.global); - printf ("\n"); - printf (" src.local d=%d ", d); - bbox_print (&info[d].src.local); - printf ("\n"); - printf (" src.active d=%d ", d); - bbox_print (&info[d].src.active); - printf ("\n"); - printf (" src.slab d=%d ", d); - bbox_print (&info[d].src.slab); - printf ("\n"); - } - ifdebug printf ("dstinfo:\n"); - for (d=0; d<SLAB_MAXDIM; ++d) { - printf (" dst.global d=%d ", d); - bbox_print (&info[d].dst.global); - printf ("\n"); - printf (" dst.local d=%d ", d); - bbox_print (&info[d].dst.local); - printf ("\n"); - printf (" dst.active d=%d ", d); - bbox_print (&info[d].dst.active); - printf ("\n"); - printf (" dst.slab d=%d ", d); - bbox_print (&info[d].dst.slab); - printf ("\n"); - } - ifdebug printf ("info:\n"); - for (d=0; d<SLAB_MAXDIM; ++d) { - printf (" xpose d=%d %d\n", d, info[d].xpose); - printf (" flip d=%d %d\n", d, info[d].flip); - } - } - - { - int iflag[SLAB_MAXDIM]; - for (d=0; d<SLAB_MAXDIM; ++d) { - iflag[d] = 0; - } - for (d=0; d<SLAB_MAXDIM; ++d) { - assert (! iflag[info[d].xpose]); - iflag[info[d].xpose] = 1; - } - for (d=0; d<SLAB_MAXDIM; ++d) { - assert (iflag[d]); - } - /* Allow non-contributing processors to be non-knowledgeable */ - for (d=0; d<SLAB_MAXDIM; ++d) { - if (info[info[d].xpose].src.slab.len && info[d].dst.slab.len > 0) { - assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len); - } - } - } - - srclentot = 1; - dstlentot = 1; - for (d=0; d<SLAB_MAXDIM; ++d) { - srclentot *= info[d].src.local.len; - dstlentot *= info[d].dst.local.len; - } - - /* Check arguments (continued) */ - for (var=0; var<nvars; ++var) if (srclentot > 0) assert (srcptrs[var]); - for (var=0; var<nvars; ++var) if (dstlentot > 0) assert (dstptrs[var]); - - - - { - CCTK_POINTER tmp; - int const iret = Util_TableGetPointer (options, &tmp, "comm"); - if (iret == 1) { - /* There was an entry, use it */ - comm = * (MPI_Comm const *) tmp; - } else if (iret == UTIL_ERROR_BAD_HANDLE - || iret == UTIL_ERROR_TABLE_NO_SUCH_KEY) { - /* There was no entry, use a default */ - comm = get_mpi_comm (cctkGH); - } else { - /* Something went wrong, abort */ - check (0); - } - } - - ifcheck { - ifdebug fflush (stdout); - MPI_Barrier (comm); - } - - MPI_Comm_size (comm, &size); - MPI_Comm_rank (comm, &rank); - - ifcheck { - static int count = 424242; - int mincount, maxcount; - ifdebug fflush (stdout); - MPI_Allreduce (&count, &mincount, 1, MPI_INT, MPI_MIN, comm); - MPI_Allreduce (&count, &maxcount, 1, MPI_INT, MPI_MAX, comm); - assert (mincount == count); - assert (maxcount == count); - ++ count; - } - - check (nvars >= 1); - srctype = srctypes[0]; - dsttype = dsttypes[0]; - for (var=0; var<nvars; ++var) { - check (srctypes[var] == srctype); - check (dsttypes[var] == dsttype); - } - check (srctype == dsttype); - srctypesize = CCTK_VarTypeSize (srctype); - check (srctypesize > 0); - dsttypesize = CCTK_VarTypeSize (dsttype); - check (dsttypesize > 0); - srcdatatype = mpi_type (srctype); - check (srcdatatype >= 0); - dstdatatype = mpi_type (dsttype); - check (dstdatatype >= 0); - - - - allinfo = malloc (size * SLAB_MAXDIM * sizeof *allinfo); - check (allinfo); - { - int const info_nints = sizeof(struct info) / sizeof(int); - ifdebug fflush (stdout); - MPI_Allgather - (info, SLAB_MAXDIM * info_nints, MPI_INT, - allinfo, SLAB_MAXDIM * info_nints, MPI_INT, comm); - } - - for (n = 0; n < size; ++n) { - for (d=0; d<SLAB_MAXDIM; ++d) { - /* Allow non-contributing processors to be non-knowledgeable */ - if (allinfo[n*SLAB_MAXDIM+d].src.slab.len > 0 - && info[d].src.slab.len > 0) - { - assert - (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off); - assert - (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len); - assert - (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str); - assert - (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str); - assert - (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str); - /* 2003-03-01 schnetter: I don't know why the following should - be necessary */ - assert - (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str); - } - if (allinfo[n*SLAB_MAXDIM+d].dst.slab.len > 0 - && info[d].dst.slab.len > 0) - { - assert - (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str); - } - assert (allinfo[n*SLAB_MAXDIM+d].xpose == info[d].xpose); - assert (allinfo[n*SLAB_MAXDIM+d].flip == info[d].flip); - } - } - - - - srcdetail = malloc (size * SLAB_MAXDIM * sizeof *srcdetail); - check (srcdetail); - for (n = 0; n < size; ++n) { - ifdebug printf ("srcdetail n=%d:\n", n); - for (d=0; d<SLAB_MAXDIM; ++d) { - srcdetail[n*SLAB_MAXDIM+d] = allinfo[n*SLAB_MAXDIM+d].src.slab; - ifdebug printf (" src.slab d=%d ", d); - ifdebug bbox_print (&srcdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - bbox_clip (&srcdetail[n*SLAB_MAXDIM+d], &info[d].src.active); - ifdebug printf (" clipped with src.active d=%d ", d); - ifdebug bbox_print (&srcdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - } - for (d=0; d<SLAB_MAXDIM; ++d) { - struct bbox whereto; - struct bbox wherefrom; - whereto = allinfo[n*SLAB_MAXDIM+d].dst.slab; - ifdebug printf (" dst.slab d=%d ", info[d].xpose); - ifdebug bbox_print (&whereto); - ifdebug printf ("\n"); - bbox_clip (&whereto, &allinfo[n*SLAB_MAXDIM+d].dst.active); - ifdebug printf (" whereto d=%d ", info[d].xpose); - ifdebug bbox_print (&whereto); - ifdebug printf ("\n"); - bbox_xform - (&wherefrom, &whereto, - &allinfo[n*SLAB_MAXDIM+info[d].xpose].src.slab, - &allinfo[n*SLAB_MAXDIM+d].dst.slab, - info[d].flip); - ifdebug printf (" wherefrom d=%d ", info[d].xpose); - ifdebug bbox_print (&wherefrom); - ifdebug printf ("\n"); - bbox_clip (&srcdetail[n*SLAB_MAXDIM+info[d].xpose], &wherefrom); - ifdebug printf (" clipped with wherefrom d=%d ", info[d].xpose); - ifdebug bbox_print (&srcdetail[n*SLAB_MAXDIM+info[d].xpose]); - ifdebug printf ("\n"); - } - } - - srcelems = malloc (size * sizeof *srcelems); - check (srcelems); - srccount = malloc (size * sizeof *srccount); - check (srccount); - srcoffset = malloc ((size + 1) * sizeof *srcoffset); - check (srcoffset); - srcoffset[0] = 0; - for (n = 0; n < size; ++n) { - srcelems[n] = 1; - for (d=0; d<SLAB_MAXDIM; ++d) { - srcelems[n] *= srcdetail[n*SLAB_MAXDIM+d].len; - } - srccount[n] = nvars * srcelems[n]; - ifdebug printf - ("srccnt n=%d offset=%d count=%d\n", n, srcoffset[n], srccount[n]); - srcoffset[n+1] = srcoffset[n] + srccount[n]; - } - srcdata = malloc (srcoffset[size] * srctypesize); - check (srcoffset[size]==0 || srcdata); - ifcheck { - if (srctype == CCTK_VARIABLE_REAL) { - CCTK_REAL * restrict const srcdataptr = srcdata; - CCTK_REAL marker; - memset (&marker, POISON_VALUE, sizeof marker); - for (i = 0; i < srcoffset[size]; ++i) { - memcpy (&srcdataptr[i], &marker, sizeof marker); - } - } - } - - dstdetail = malloc (size * SLAB_MAXDIM * sizeof *dstdetail); - check (dstdetail); - for (n = 0; n < size; ++n) { - ifdebug printf ("dstdetail n=%d:\n", n); - for (d=0; d<SLAB_MAXDIM; ++d) { -/* dstdetail[n*SLAB_MAXDIM+d] = allinfo[n*SLAB_MAXDIM+d].dst.slab; */ - dstdetail[n*SLAB_MAXDIM+d] = info[d].dst.slab; - ifdebug printf (" dst.slab d=%d ", d); - ifdebug bbox_print (&dstdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - bbox_clip (&dstdetail[n*SLAB_MAXDIM+d], &info[d].dst.active); - ifdebug printf (" clipped with dst.active d=%d ", d); - ifdebug bbox_print (&dstdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - } - for (d=0; d<SLAB_MAXDIM; ++d) { - struct bbox wherefrom; - struct bbox whereto; -/* wherefrom = allinfo[n*SLAB_MAXDIM+info[d].xpose].src.slab; */ - wherefrom = info[info[d].xpose].src.slab; - ifdebug printf (" src.slab d=%d ", d); - ifdebug bbox_print (&dstdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - bbox_clip (&wherefrom, &allinfo[n*SLAB_MAXDIM+info[d].xpose].src.active); - ifdebug printf (" wherefrom d=%d ", d); - ifdebug bbox_print (&dstdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - bbox_xform - (&whereto, &wherefrom, - &allinfo[n*SLAB_MAXDIM+d].dst.slab, - &allinfo[n*SLAB_MAXDIM+info[d].xpose].src.slab, - info[d].flip); - ifdebug printf (" whereto d=%d ", d); - ifdebug bbox_print (&dstdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - bbox_clip (&dstdetail[n*SLAB_MAXDIM+d], &whereto); - ifdebug printf (" clipped with whereto d=%d ", d); - ifdebug bbox_print (&dstdetail[n*SLAB_MAXDIM+d]); - ifdebug printf ("\n"); - } - } - - dstelems = malloc (size * sizeof *dstelems); - check (dstelems); - dstcount = malloc (size * sizeof *dstcount); - check (dstcount); - dstoffset = malloc ((size + 1) * sizeof *dstoffset); - check (dstoffset); - dstoffset[0] = 0; - for (n = 0; n < size; ++n) { - dstelems[n] = 1; - for (d=0; d<SLAB_MAXDIM; ++d) { - dstelems[n] *= dstdetail[n*SLAB_MAXDIM+d].len; - } - dstcount[n] = nvars * dstelems[n]; - ifdebug printf - ("dstcnt n=%d offset=%d count=%d\n", n, dstoffset[n], dstcount[n]); - dstoffset[n+1] = dstoffset[n] + dstcount[n]; - } - dstdata = malloc (dstoffset[size] * dsttypesize); - check (dstoffset[size]==0 || dstdata); - ifcheck { - if (dsttype == CCTK_VARIABLE_REAL) { - CCTK_REAL * restrict const dstdataptr = dstdata; - CCTK_REAL marker; - memset (&marker, POISON_VALUE, sizeof marker); - for (i = 0; i < dstoffset[size]; ++i) { - memcpy (&dstdataptr[i], &marker, sizeof marker); - } - } - } - - check (srccount[rank] == dstcount[rank]); - - ifcheck { - int * restrict src2count; - int * restrict dst2count; - src2count = malloc (size * sizeof *src2count); - check (src2count); - dst2count = malloc (size * sizeof *dst2count); - check (dst2count); - ifdebug fflush (stdout); - MPI_Alltoall (srccount, 1, MPI_INT, src2count, 1, MPI_INT, comm); - MPI_Alltoall (dstcount, 1, MPI_INT, dst2count, 1, MPI_INT, comm); - for (n = 0; n < size; ++n) { - check (src2count[n] == dstcount[n]); - check (dst2count[n] == srccount[n]); - } - free (src2count); - free (dst2count); - } - - CCTK_TimerStopI (timer_init); - - - - CCTK_TimerStartI (timer_copy_in); - - for (n = 0; n < size; ++n) { - check (SLAB_MAXDIM == 3); - - if (info[0].xpose==0 && info[1].xpose==1 && info[2].xpose==2 - && srcdetail[n*SLAB_MAXDIM ].str==1 - && srcdetail[n*SLAB_MAXDIM+1].str==1 - && srcdetail[n*SLAB_MAXDIM+2].str==1 - && srctype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case: no transposing */ - - int const srcoffi = info[0].src.local.off; - int const srcoffj = info[1].src.local.off; - int const srcoffk = info[2].src.local.off; - - int const srcleni = info[0].src.local.len; - int const srclenj = info[1].src.local.len; - int const srclenk = info[2].src.local.len; - - int const srcdetailoffi = srcdetail[n*SLAB_MAXDIM+0].off; - int const srcdetailoffj = srcdetail[n*SLAB_MAXDIM+1].off; - int const srcdetailoffk = srcdetail[n*SLAB_MAXDIM+2].off; - - int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+0].len; - int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len; - int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len; - - if (n==0) assert (srcoffset[n]==0); - /* TODO: This does not take nvars into account */ -/* if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk); */ - - for (k = 0; k < srcdetaillenk; ++k) { - for (j = 0; j < srcdetaillenj; ++j) { - for (i = 0; i < srcdetailleni; ++i) { - int const srcindi = srcdetailoffi + i - srcoffi; - int const srcindj = srcdetailoffj + j - srcoffj; - int const srcindk = srcdetailoffk + k - srcoffk; - size_t const srcind - = srcindi + srcleni * (srcindj + srclenj * srcindk); - size_t const bufind = i + srcdetailleni * (j + srcdetaillenj * k); - ifcheck assert (srcindi>=0 && srcindi<srcleni); - ifcheck assert (srcindj>=0 && srcindj<srclenj); - ifcheck assert (srcindk>=0 && srcindk<srclenk); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)srcdata)[srcoffset[n] + var * srcelems[n] + bufind] - = ((const CCTK_REAL*)srcptrs[var])[srcind]; - } - } - } - } - - } else if (info[0].xpose==1 && info[1].xpose==0 && info[2].xpose==2 - && srcdetail[n*SLAB_MAXDIM ].str==1 - && srcdetail[n*SLAB_MAXDIM+1].str==1 - && srcdetail[n*SLAB_MAXDIM+2].str==1 - && srctype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case: transpose x and y */ - - int const srcoffi = info[0].src.local.off; - int const srcoffj = info[1].src.local.off; - int const srcoffk = info[2].src.local.off; - - int const srcleni = info[0].src.local.len; - int const srclenj = info[1].src.local.len; - int const srclenk = info[2].src.local.len; - - int const srcdetailoffi = srcdetail[n*SLAB_MAXDIM+0].off; - int const srcdetailoffj = srcdetail[n*SLAB_MAXDIM+1].off; - int const srcdetailoffk = srcdetail[n*SLAB_MAXDIM+2].off; - - int const srcdetailleni = srcdetail[n*SLAB_MAXDIM+0].len; - int const srcdetaillenj = srcdetail[n*SLAB_MAXDIM+1].len; - int const srcdetaillenk = srcdetail[n*SLAB_MAXDIM+2].len; - - if (n==0) assert (srcoffset[n]==0); - /* TODO: This does not take nvars into account */ -/* if (n<size-1) assert (srcoffset[n+1]==srcoffset[n]+srcdetailleni*srcdetaillenj*srcdetaillenk); */ - - for (k = 0; k < srcdetaillenk; ++k) { - for (j = 0; j < srcdetaillenj; ++j) { - for (i = 0; i < srcdetailleni; ++i) { - int const srcindi = srcdetailoffi + i - srcoffi; - int const srcindj = srcdetailoffj + j - srcoffj; - int const srcindk = srcdetailoffk + k - srcoffk; - size_t const srcind - = srcindi + srcleni * (srcindj + srclenj * srcindk); - size_t const bufind = j + srcdetaillenj * (i + srcdetailleni * k); - ifcheck assert (srcindi>=0 && srcindi<srcleni); - ifcheck assert (srcindj>=0 && srcindj<srclenj); - ifcheck assert (srcindk>=0 && srcindk<srclenk); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)srcdata)[srcoffset[n] + var * srcelems[n] + bufind] - = ((const CCTK_REAL*)srcptrs[var])[srcind]; - } - } - } - } - - } else if (srcdetail[n*SLAB_MAXDIM ].str==1 - && srcdetail[n*SLAB_MAXDIM+1].str==1 - && srcdetail[n*SLAB_MAXDIM+2].str==1 - && srctype == CCTK_VARIABLE_REAL) { - /* Optimised version for CCTK_REAL and stride 1 */ - - for (k = 0; k < srcdetail[n*SLAB_MAXDIM+info[2].xpose].len; ++k) { - for (j = 0; j < srcdetail[n*SLAB_MAXDIM+info[1].xpose].len; ++j) { - for (i = 0; i < srcdetail[n*SLAB_MAXDIM+info[0].xpose].len; ++i) { - int ipos[SLAB_MAXDIM]; - int srcipos[SLAB_MAXDIM]; - int bufipos[SLAB_MAXDIM]; - size_t srcind; - size_t bufind; - ipos[0] = i; - ipos[1] = j; - ipos[2] = k; - for (d=0; d<SLAB_MAXDIM; ++d) { - int const c = info[d].xpose; - srcipos[c] = srcdetail[n*SLAB_MAXDIM+c].off + ipos[d]; - assert (srcipos[c] >= info[c].src.local.off - && srcipos[c] < info[c].src.local.off + info[c].src.local.len); - assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off - && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1)); - bufipos[d] = ipos[d]; - assert (bufipos[d] >= 0 - && bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); - } - srcind = 0; - bufind = 0; - for (d=SLAB_MAXDIM-1; d>=0; --d) { - int const c = info[d].xpose; - srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; - bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; - } - assert (srcind < srclentot); - assert (bufind < (size_t)srccount[n]); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)srcdata)[srcoffset[n] + var * srcelems[n] + bufind] - = ((const CCTK_REAL*)srcptrs[var])[srcind]; - } - } - } - } - - } else { - /* Generic, unoptimised version */ - - for (k = 0; k < srcdetail[n*SLAB_MAXDIM+info[2].xpose].len; ++k) { - for (j = 0; j < srcdetail[n*SLAB_MAXDIM+info[1].xpose].len; ++j) { - for (i = 0; i < srcdetail[n*SLAB_MAXDIM+info[0].xpose].len; ++i) { - int ipos[SLAB_MAXDIM]; - int srcipos[SLAB_MAXDIM]; - int bufipos[SLAB_MAXDIM]; - size_t srcind; - size_t bufind; - ipos[0] = i; - ipos[1] = j; - ipos[2] = k; - for (d=0; d<SLAB_MAXDIM; ++d) { - int const c = info[d].xpose; - srcipos[c] = srcdetail[n*SLAB_MAXDIM+c].off + ipos[d] * srcdetail[n*SLAB_MAXDIM+c].str; - assert (srcipos[c] >= info[c].src.local.off - && srcipos[c] < info[c].src.local.off + info[c].src.local.len); - assert (srcipos[c] >= allinfo[n*SLAB_MAXDIM+c].src.slab.off - && srcipos[c] <= allinfo[n*SLAB_MAXDIM+c].src.slab.off + (allinfo[n*SLAB_MAXDIM+c].src.slab.len - 1) * allinfo[n*SLAB_MAXDIM+c].src.slab.str); - assert ((srcipos[c] - allinfo[n*SLAB_MAXDIM+c].src.slab.off) % allinfo[n*SLAB_MAXDIM+c].src.slab.str == 0); - bufipos[d] = ipos[d]; - assert (bufipos[d] >= 0 && bufipos[d] < srcdetail[n*SLAB_MAXDIM+c].len); - } - srcind = 0; - bufind = 0; - for (d=SLAB_MAXDIM-1; d>=0; --d) { - int const c = info[d].xpose; - srcind = srcind * info[d].src.local.len + srcipos[d] - info[d].src.local.off; - bufind = bufind * srcdetail[n*SLAB_MAXDIM+c].len + bufipos[d]; - } - assert (srcind < srclentot); - assert (bufind < (size_t)srccount[n]); - for (var=0; var<nvars; ++var) { - memcpy ((char *) srcdata + srctypesize * (srcoffset[n] + var * srcelems[n] + bufind), - (const char *) srcptrs[var] + srctypesize * srcind, - srctypesize); - } - } - } - } - - } - } /* for n */ - - ifcheck { - if (srctype == CCTK_VARIABLE_REAL) { - const CCTK_REAL * restrict const srcdataptr = srcdata; - CCTK_REAL marker; - memset (&marker, POISON_VALUE, sizeof marker); - for (i = 0; i < srcoffset[size]; ++i) { - assert (memcmp(&srcdataptr[i], &marker, sizeof marker) != 0); - } - } - } - CCTK_TimerStopI (timer_copy_in); - - - - CCTK_TimerStartI (timer_xfer); - ifdebug fflush (stdout); - if (! HAVE_MPI || use_alltoallv) { - MPI_Alltoallv - (srcdata, srccount, srcoffset, srcdatatype, - dstdata, dstcount, dstoffset, dstdatatype, comm); - } else { - /* */ - MPI_Request * requests = malloc (2 * size * sizeof * requests); - assert (requests); - /* Start receive */ - for (n = 0; n < size; ++n) { - if (n != rank && dstcount[n] > 0) { - MPI_Irecv - ((char *)dstdata + dsttypesize * dstoffset[n], - dstcount[n], dstdatatype, - n, 0, comm, &requests[n]); - } else { - requests[n] = MPI_REQUEST_NULL; - } - } - /* Start send */ - for (n = 0; n < size; ++n) { - if (n != rank && srccount[n] > 0) { - MPI_Isend - ((char *)srcdata + srctypesize * srcoffset[n], - srccount[n], srcdatatype, - n, 0, comm, &requests[size + n]); - } else { - requests[size + n] = MPI_REQUEST_NULL; - } - } - /* Self communication */ - { - n = rank; - assert (dstcount[n] == srccount[n]); - memcpy ((char *)dstdata + dsttypesize * dstoffset[n], - (char *)srcdata + srctypesize * srcoffset[n], - dstcount[n] * dsttypesize); - } - /* Wait */ - MPI_Waitall (2 * size, requests, MPI_STATUSES_IGNORE); - /* */ - free (requests); - } - - ifcheck { - if (dsttype == CCTK_VARIABLE_REAL) { - for (var=0; var<nvars; ++var) { - const CCTK_REAL * restrict const dstdataptr = dstdata; - CCTK_REAL marker; - memset (&marker, POISON_VALUE, sizeof marker); - for (i = 0; i < dstoffset[size]; ++i) { - assert (memcmp(&dstdataptr[i], &marker, sizeof marker) != 0); - } - } - } - } - CCTK_TimerStopI (timer_xfer); - - - - CCTK_TimerStartI (timer_copy_back); - for (n = 0; n < size; ++n) { - check (SLAB_MAXDIM == 3); - - if (info[0].flip==0 && info[1].flip==0 && info[2].flip==0 - && dstdetail[n*SLAB_MAXDIM ].str==1 - && dstdetail[n*SLAB_MAXDIM+1].str==1 - && dstdetail[n*SLAB_MAXDIM+2].str==1 - && dsttype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case: no flipping */ - - int const dstoffi = info[0].dst.local.off; - int const dstoffj = info[1].dst.local.off; - int const dstoffk = info[2].dst.local.off; - - int const dstleni = info[0].dst.local.len; - int const dstlenj = info[1].dst.local.len; - int const dstlenk = info[2].dst.local.len; - - int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off; - int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off; - int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off; - - int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; - int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; - int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; - - for (k = 0; k < dstdetaillenk; ++k) { - for (j = 0; j < dstdetaillenj; ++j) { - for (i = 0; i < dstdetailleni; ++i) { - size_t const bufind = i + dstdetailleni * (j + dstdetaillenj * k); - int const dstindi = dstdetailoffi + i - dstoffi; - int const dstindj = dstdetailoffj + j - dstoffj; - int const dstindk = dstdetailoffk + k - dstoffk; - size_t const dstind - = dstindi + dstleni * (dstindj + dstlenj * dstindk); - ifcheck assert (dstindi>=0 && dstindi<dstleni); - ifcheck assert (dstindj>=0 && dstindj<dstlenj); - ifcheck assert (dstindk>=0 && dstindk<dstlenk); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)dstptrs[var])[dstind] - = ((const CCTK_REAL*)dstdata)[dstoffset[n] + var * dstelems[n] + bufind]; - } - } - } - } - - } else if (info[0].flip==1 && info[1].flip==0 && info[2].flip==0 - && dstdetail[n*SLAB_MAXDIM ].str==1 - && dstdetail[n*SLAB_MAXDIM+1].str==1 - && dstdetail[n*SLAB_MAXDIM+2].str==1 - && dsttype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case: flip in x direction */ - - int const dstoffi = info[0].dst.local.off; - int const dstoffj = info[1].dst.local.off; - int const dstoffk = info[2].dst.local.off; - - int const dstleni = info[0].dst.local.len; - int const dstlenj = info[1].dst.local.len; - int const dstlenk = info[2].dst.local.len; - - int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off; - int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off; - int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off; - - int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; - int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; - int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; - - for (k = 0; k < dstdetaillenk; ++k) { - for (j = 0; j < dstdetaillenj; ++j) { - for (i = 0; i < dstdetailleni; ++i) { - size_t const bufind - = (dstdetailleni - 1 - i) + dstdetailleni * (j + dstdetaillenj * k); - int const dstindi = dstdetailoffi + i - dstoffi; - int const dstindj = dstdetailoffj + j - dstoffj; - int const dstindk = dstdetailoffk + k - dstoffk; - size_t const dstind - = dstindi + dstleni * (dstindj + dstlenj * dstindk); - ifcheck assert (dstindi>=0 && dstindi<dstleni); - ifcheck assert (dstindj>=0 && dstindj<dstlenj); - ifcheck assert (dstindk>=0 && dstindk<dstlenk); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)dstptrs[var])[dstind] - = ((const CCTK_REAL*)dstdata)[dstoffset[n] + var * dstelems[n] + bufind]; - } - } - } - } - - } else if (info[0].flip==0 && info[1].flip==1 && info[2].flip==0 - && dstdetail[n*SLAB_MAXDIM ].str==1 - && dstdetail[n*SLAB_MAXDIM+1].str==1 - && dstdetail[n*SLAB_MAXDIM+2].str==1 - && dsttype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case: flip in y direction */ - - int const dstoffi = info[0].dst.local.off; - int const dstoffj = info[1].dst.local.off; - int const dstoffk = info[2].dst.local.off; - - int const dstleni = info[0].dst.local.len; - int const dstlenj = info[1].dst.local.len; - int const dstlenk = info[2].dst.local.len; - - int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off; - int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off; - int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off; - - int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; - int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; - int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; - - for (k = 0; k < dstdetaillenk; ++k) { - for (j = 0; j < dstdetaillenj; ++j) { - for (i = 0; i < dstdetailleni; ++i) { - size_t const bufind - = i + dstdetailleni * ((dstdetaillenj - 1 - j) + dstdetaillenj * k); - int const dstindi = dstdetailoffi + i - dstoffi; - int const dstindj = dstdetailoffj + j - dstoffj; - int const dstindk = dstdetailoffk + k - dstoffk; - size_t const dstind - = dstindi + dstleni * (dstindj + dstlenj * dstindk); - ifcheck assert (dstindi>=0 && dstindi<dstleni); - ifcheck assert (dstindj>=0 && dstindj<dstlenj); - ifcheck assert (dstindk>=0 && dstindk<dstlenk); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)dstptrs[var])[dstind] - = ((const CCTK_REAL*)dstdata)[dstoffset[n] + var * dstelems[n] + bufind]; - } - } - } - } - - } else if (info[0].flip==1 && info[1].flip==1 && info[2].flip==0 - && dstdetail[n*SLAB_MAXDIM ].str==1 - && dstdetail[n*SLAB_MAXDIM+1].str==1 - && dstdetail[n*SLAB_MAXDIM+2].str==1 - && dsttype == CCTK_VARIABLE_REAL) { - /* Optimised version for a special case: flip in x and y direction */ - - int const dstoffi = info[0].dst.local.off; - int const dstoffj = info[1].dst.local.off; - int const dstoffk = info[2].dst.local.off; - - int const dstleni = info[0].dst.local.len; - int const dstlenj = info[1].dst.local.len; - int const dstlenk = info[2].dst.local.len; - - int const dstdetailoffi = dstdetail[n*SLAB_MAXDIM+0].off; - int const dstdetailoffj = dstdetail[n*SLAB_MAXDIM+1].off; - int const dstdetailoffk = dstdetail[n*SLAB_MAXDIM+2].off; - - int const dstdetailleni = dstdetail[n*SLAB_MAXDIM+0].len; - int const dstdetaillenj = dstdetail[n*SLAB_MAXDIM+1].len; - int const dstdetaillenk = dstdetail[n*SLAB_MAXDIM+2].len; - - for (k = 0; k < dstdetaillenk; ++k) { - for (j = 0; j < dstdetaillenj; ++j) { - for (i = 0; i < dstdetailleni; ++i) { - size_t const bufind - = (dstdetailleni - 1 - i) + dstdetailleni * ((dstdetaillenj - 1 - j) + dstdetaillenj * k); - int const dstindi = dstdetailoffi + i - dstoffi; - int const dstindj = dstdetailoffj + j - dstoffj; - int const dstindk = dstdetailoffk + k - dstoffk; - size_t const dstind - = dstindi + dstleni * (dstindj + dstlenj * dstindk); - ifcheck assert (dstindi>=0 && dstindi<dstleni); - ifcheck assert (dstindj>=0 && dstindj<dstlenj); - ifcheck assert (dstindk>=0 && dstindk<dstlenk); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)dstptrs[var])[dstind] - = ((const CCTK_REAL*)dstdata)[dstoffset[n] + var * dstelems[n] + bufind]; - } - } - } - } - - } else if (dstdetail[n*SLAB_MAXDIM ].str==1 - && dstdetail[n*SLAB_MAXDIM+1].str==1 - && dstdetail[n*SLAB_MAXDIM+2].str==1 - && dsttype == CCTK_VARIABLE_REAL) { - /* Optimised version for CCTK_REAL and stride 1 */ - - for (k = 0; k < dstdetail[n*SLAB_MAXDIM+2].len; ++k) { - for (j = 0; j < dstdetail[n*SLAB_MAXDIM+1].len; ++j) { - for (i = 0; i < dstdetail[n*SLAB_MAXDIM+0].len; ++i) { - int ipos[SLAB_MAXDIM]; - int bufipos[SLAB_MAXDIM]; - int dstipos[SLAB_MAXDIM]; - size_t bufind; - size_t dstind; - ipos[0] = i; - ipos[1] = j; - ipos[2] = k; - for (d=0; d<SLAB_MAXDIM; ++d) { - if (! info[d].flip) { - bufipos[d] = ipos[d]; - } else { - bufipos[d] = dstdetail[n*SLAB_MAXDIM+d].len - 1 - ipos[d]; - } - ifcheck assert (bufipos[d] >= 0 && bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); - dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d]; - ifcheck assert (dstipos[d] >= info[d].dst.local.off - && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); - ifcheck assert (dstipos[d] >= info[d].dst.slab.off - && dstipos[d] <= info[d].dst.slab.off + info[d].dst.slab.len - 1); - } - bufind = 0; - dstind = 0; - for (d=SLAB_MAXDIM-1; d>=0; --d) { - bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; - dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; - } - ifcheck assert (bufind < (size_t)dstcount[n]); - ifcheck assert (dstind < dstlentot); - for (var=0; var<nvars; ++var) { - ((CCTK_REAL*)dstptrs[var])[dstind] - = ((const CCTK_REAL*)dstdata)[dstoffset[n] + var * dstelems[n] + bufind]; - } - } - } - } - - } else { - /* Generic, unoptimised version */ - - for (k = 0; k < dstdetail[n*SLAB_MAXDIM+2].len; ++k) { - for (j = 0; j < dstdetail[n*SLAB_MAXDIM+1].len; ++j) { - for (i = 0; i < dstdetail[n*SLAB_MAXDIM+0].len; ++i) { - int ipos[SLAB_MAXDIM]; - int bufipos[SLAB_MAXDIM]; - int dstipos[SLAB_MAXDIM]; - size_t bufind; - size_t dstind; - ipos[0] = i; - ipos[1] = j; - ipos[2] = k; - for (d=0; d<SLAB_MAXDIM; ++d) { - if (! info[d].flip) { - bufipos[d] = ipos[d]; - } else { - bufipos[d] = dstdetail[n*SLAB_MAXDIM+d].len - 1 - ipos[d]; - } - ifcheck assert (bufipos[d] >= 0 && bufipos[d] < dstdetail[n*SLAB_MAXDIM+d].len); - dstipos[d] = dstdetail[n*SLAB_MAXDIM+d].off + ipos[d] * info[d].dst.slab.str; - ifcheck assert (dstipos[d] >= info[d].dst.local.off - && dstipos[d] < info[d].dst.local.off + info[d].dst.local.len); - ifcheck assert (dstipos[d] >= info[d].dst.slab.off - && dstipos[d] <= info[d].dst.slab.off + (info[d].dst.slab.len - 1) * info[d].dst.slab.str); - ifcheck assert ((dstipos[d] - info[d].dst.slab.off) % info[d].dst.slab.str == 0); - } - bufind = 0; - dstind = 0; - for (d=SLAB_MAXDIM-1; d>=0; --d) { - bufind = bufind * dstdetail[n*SLAB_MAXDIM+d].len + bufipos[d]; - dstind = dstind * info[d].dst.local.len + dstipos[d] - info[d].dst.local.off; - } - ifcheck assert (bufind < (size_t)dstcount[n]); - ifcheck assert (dstind < dstlentot); - for (var=0; var<nvars; ++var) { - memcpy ((char *) dstptrs[var] + dsttypesize * dstind, - (const char *) dstdata + dsttypesize * (dstoffset[n] + var * dstelems[n] + bufind), - dsttypesize); - } - } - } - } - - } - - } /* for n */ - CCTK_TimerStopI (timer_copy_back); - - - - free (dstdata); - free (dstelems); - free (dstcount); - free (dstoffset); - free (dstdetail); - - free (srcdata); - free (srcelems); - free (srccount); - free (srcoffset); - free (srcdetail); - - free (allinfo); - free (info); - - - - ifcheck { - ifdebug fflush (stdout); - MPI_Barrier (comm); - } - - return 0; -} - - - -void CCTK_FCALL -CCTK_FNAME(Slab_Transfer) (int * restrict const ierr, - cGH const * const * restrict const cctkGH, - int const * restrict const dim, - int const * restrict const src_gsh, - int const * restrict const src_lbnd, - int const * restrict const src_lsh, - int const * restrict const src_lbbox, - int const * restrict const src_ubbox, - int const * restrict const src_nghostzones, - int const * restrict const src_off, - int const * restrict const src_str, - int const * restrict const src_len, - int const * restrict const dst_gsh, - int const * restrict const dst_lbnd, - int const * restrict const dst_lsh, - int const * restrict const dst_lbbox, - int const * restrict const dst_ubbox, - int const * restrict const dst_nghostzones, - int const * restrict const dst_off, - int const * restrict const dst_str, - int const * restrict const dst_len, - int const * restrict const xpose, - int const * restrict const flip, - int const * restrict const options, - int const * restrict const srctype, - void const * restrict const srcptr, - int const * restrict const dsttype, - void * restrict const dstptr) -{ - struct xferinfo * restrict xferinfo; - int d; - - xferinfo = malloc (*dim * sizeof *xferinfo); - assert (xferinfo); - - for (d=0; d<*dim; ++d) { - xferinfo[d].src.gsh = src_gsh[d]; - xferinfo[d].src.lbnd = src_lbnd[d]; - xferinfo[d].src.lsh = src_lsh[d]; - xferinfo[d].src.lbbox = src_lbbox[d]; - xferinfo[d].src.ubbox = src_ubbox[d]; - xferinfo[d].src.nghostzones = src_nghostzones[d]; - xferinfo[d].src.off = src_off[d]; - xferinfo[d].src.str = src_str[d]; - xferinfo[d].src.len = src_len[d]; - - xferinfo[d].dst.gsh = dst_gsh[d]; - xferinfo[d].dst.lbnd = dst_lbnd[d]; - xferinfo[d].dst.lsh = dst_lsh[d]; - xferinfo[d].dst.lbbox = dst_lbbox[d]; - xferinfo[d].dst.ubbox = dst_ubbox[d]; - xferinfo[d].dst.nghostzones = dst_nghostzones[d]; - xferinfo[d].dst.off = dst_off[d]; - xferinfo[d].dst.str = dst_str[d]; - xferinfo[d].dst.len = dst_len[d]; - - xferinfo[d].xpose = xpose[d]; - xferinfo[d].flip = flip[d]; - } - - *ierr = Slab_Transfer (*cctkGH, *dim, xferinfo, *options, - *srctype, srcptr, *dsttype, dstptr); - - free (xferinfo); -} - - - -int -Slab_Transfer (cGH const * restrict const cctkGH, - int const dim, - struct xferinfo const * restrict const xferinfo, - int const options, - int const srctype, - void const * restrict const srcptr, - int const dsttype, - void * restrict const dstptr) -{ - int const nvars = 1; - int const srctypes[] = { srctype }; - void const * restrict const srcptrs[] = { srcptr }; - int const dsttypes[] = { dsttype }; - void * restrict const dstptrs[] = { dstptr }; - return Slab_MultiTransfer (cctkGH, dim, xferinfo, options, - nvars, srctypes, srcptrs, dsttypes, dstptrs); -} |