From 738e46497e17a6fe78b03e4c51fdd7eee4f185af Mon Sep 17 00:00:00 2001 From: schnetter Date: Fri, 4 Aug 2006 14:39:58 +0000 Subject: Introduce a new parameter "use_alltoallv" that decides whether to use MPI_Alltoallv or to use MPI_Irecv, MPI_Isend, and MPI_Wait to communicate. The latter is not tested, but may be faster if only a few of all processors are involved in the communication. Disable some self tests: Allow processors who do not participate in the communication (who specify a zero slab size) to not know the geometry of the source and destination slabs. git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@50 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8 --- param.ccl | 4 +++ src/slab.c | 116 +++++++++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 90 insertions(+), 30 deletions(-) diff --git a/param.ccl b/param.ccl index 3be94cc..e3d2ae2 100644 --- a/param.ccl +++ b/param.ccl @@ -4,3 +4,7 @@ BOOLEAN timer_output "Print slabbing timings at shutdown time" STEERABLE=always { } "no" + +BOOLEAN use_alltoallv "Use MPI_Alltoallv for communication?" STEERABLE=always +{ +} "yes" diff --git a/src/slab.c b/src/slab.c index 6bc418c..698bdac 100644 --- a/src/slab.c +++ b/src/slab.c @@ -4,6 +4,7 @@ TODO: Provide facilities for dim > 3 Set up the slab exchange information in advance + Slab in several stages Test slabbing without MPI Allow using / not setting the ghost zones Allow not using / not setting the boundaries @@ -33,6 +34,7 @@ #include #include "cctk.h" +#include "cctk_Parameters.h" #include "cctk_DefineThorn.h" #include "util_ErrorCodes.h" #include "util_Table.h" @@ -669,6 +671,8 @@ int Slab_MultiTransfer (cGH const * const cctkGH, int const * const dsttypes, void * const * const dstptrs) { + DECLARE_CCTK_PARAMETERS; + struct info * restrict info; size_t srclentot, dstlentot; @@ -811,8 +815,11 @@ int Slab_MultiTransfer (cGH const * const cctkGH, for (d=0; d 0) { + assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len); + } } } @@ -895,32 +902,41 @@ int Slab_MultiTransfer (cGH const * const cctkGH, for (n = 0; n < size; ++n) { for (d=0; d 0 + && info[d].src.slab.len > 0) + { + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off); + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len); + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str); + assert + (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str); + assert + (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str); + /* 2003-03-01 schnetter: I don't know why the following should + be necessary */ + assert + (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str); + } + if (allinfo[n*SLAB_MAXDIM+d].dst.slab.len > 0 + && info[d].dst.slab.len > 0) + { + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str); + } assert (allinfo[n*SLAB_MAXDIM+d].xpose == info[d].xpose); assert (allinfo[n*SLAB_MAXDIM+d].flip == info[d].flip); } @@ -1291,9 +1307,49 @@ int Slab_MultiTransfer (cGH const * const cctkGH, CCTK_TimerStartI (timer_xfer); ifdebug fflush (stdout); - MPI_Alltoallv - (srcdata, srccount, srcoffset, srcdatatype, - dstdata, dstcount, dstoffset, dstdatatype, comm); + if (use_alltoallv) { + MPI_Alltoallv + (srcdata, srccount, srcoffset, srcdatatype, + dstdata, dstcount, dstoffset, dstdatatype, comm); + } else { + /* */ + MPI_Request * requests = malloc (2 * size * sizeof * requests); + assert (requests); + /* Start receive */ + for (n = 0; n < size; ++n) { + if (n != rank && dstcount[n] > 0) { + MPI_Irecv + ((char *)dstdata + dsttypesize * dstoffset[n], + dstcount[n], dstdatatype, + n, 0, comm, &requests[n]); + } else { + requests[n] = MPI_REQUEST_NULL; + } + } + /* Start send */ + for (n = 0; n < size; ++n) { + if (n != rank && srccount[n] > 0) { + MPI_Isend + ((char *)srcdata + srctypesize * srcoffset[n], + srccount[n], srcdatatype, + n, 0, comm, &requests[size + n]); + } else { + requests[size + n] = MPI_REQUEST_NULL; + } + } + /* Self communication */ + { + n = rank; + assert (dstcount[n] == srccount[n]); + memcpy ((char *)dstdata + dsttypesize * dstoffset[n], + (char *)srcdata + srctypesize * srcoffset[n], + dstcount[n] * dsttypesize); + } + /* Wait */ + MPI_Waitall (2 * size, requests, MPI_STATUSES_IGNORE); + /* */ + free (requests); + } ifcheck { if (dsttype == CCTK_VARIABLE_REAL) { -- cgit v1.2.3