diff options
author | schnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8> | 2006-08-04 14:39:58 +0000 |
---|---|---|
committer | schnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8> | 2006-08-04 14:39:58 +0000 |
commit | 738e46497e17a6fe78b03e4c51fdd7eee4f185af (patch) | |
tree | 069337253b79b82deeb09c752948a0fca93b69ec | |
parent | dbe148103042e5637a3eab452c0a8e460b9f9299 (diff) |
Introduce a new parameter "use_alltoallv" that decides whether to use
MPI_Alltoallv or to use MPI_Irecv, MPI_Isend, and MPI_Wait to
communicate. The latter is not tested, but may be faster if only a
few of all processors are involved in the communication.
Disable some self tests: Allow processors who do not participate in
the communication (who specify a zero slab size) to not know the
geometry of the source and destination slabs.
git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@50 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8
-rw-r--r-- | param.ccl | 4 | ||||
-rw-r--r-- | src/slab.c | 116 |
2 files changed, 90 insertions, 30 deletions
@@ -4,3 +4,7 @@ BOOLEAN timer_output "Print slabbing timings at shutdown time" STEERABLE=always { } "no" + +BOOLEAN use_alltoallv "Use MPI_Alltoallv for communication?" STEERABLE=always +{ +} "yes" @@ -4,6 +4,7 @@ TODO: Provide facilities for dim > 3 Set up the slab exchange information in advance + Slab in several stages Test slabbing without MPI Allow using / not setting the ghost zones Allow not using / not setting the boundaries @@ -33,6 +34,7 @@ #include <string.h> #include "cctk.h" +#include "cctk_Parameters.h" #include "cctk_DefineThorn.h" #include "util_ErrorCodes.h" #include "util_Table.h" @@ -669,6 +671,8 @@ int Slab_MultiTransfer (cGH const * const cctkGH, int const * const dsttypes, void * const * const dstptrs) { + DECLARE_CCTK_PARAMETERS; + struct info * restrict info; size_t srclentot, dstlentot; @@ -811,8 +815,11 @@ int Slab_MultiTransfer (cGH const * const cctkGH, for (d=0; d<SLAB_MAXDIM; ++d) { assert (iflag[d]); } + /* Allow non-contributing processors to be non-knowledgeable */ for (d=0; d<SLAB_MAXDIM; ++d) { - assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len); + if (info[info[d].xpose].src.slab.len && info[d].dst.slab.len > 0) { + assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len); + } } } @@ -895,32 +902,41 @@ int Slab_MultiTransfer (cGH const * const cctkGH, for (n = 0; n < size; ++n) { for (d=0; d<SLAB_MAXDIM; ++d) { - assert - (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off); - assert - (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len); - assert - (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str); - assert - (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str); - assert - (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str); - /* 2003-03-01 schnetter: I don't know why the following two - should be necessary */ - assert - (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str); - assert - (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str); + /* Allow non-contributing processors to be non-knowledgeable */ + if (allinfo[n*SLAB_MAXDIM+d].src.slab.len > 0 + && info[d].src.slab.len > 0) + { + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off); + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len); + assert + (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str); + assert + (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str); + assert + (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str); + /* 2003-03-01 schnetter: I don't know why the following should + be necessary */ + assert + (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str); + } + if (allinfo[n*SLAB_MAXDIM+d].dst.slab.len > 0 + && info[d].dst.slab.len > 0) + { + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str); + assert + (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str); + } assert (allinfo[n*SLAB_MAXDIM+d].xpose == info[d].xpose); assert (allinfo[n*SLAB_MAXDIM+d].flip == info[d].flip); } @@ -1291,9 +1307,49 @@ int Slab_MultiTransfer (cGH const * const cctkGH, CCTK_TimerStartI (timer_xfer); ifdebug fflush (stdout); - MPI_Alltoallv - (srcdata, srccount, srcoffset, srcdatatype, - dstdata, dstcount, dstoffset, dstdatatype, comm); + if (use_alltoallv) { + MPI_Alltoallv + (srcdata, srccount, srcoffset, srcdatatype, + dstdata, dstcount, dstoffset, dstdatatype, comm); + } else { + /* */ + MPI_Request * requests = malloc (2 * size * sizeof * requests); + assert (requests); + /* Start receive */ + for (n = 0; n < size; ++n) { + if (n != rank && dstcount[n] > 0) { + MPI_Irecv + ((char *)dstdata + dsttypesize * dstoffset[n], + dstcount[n], dstdatatype, + n, 0, comm, &requests[n]); + } else { + requests[n] = MPI_REQUEST_NULL; + } + } + /* Start send */ + for (n = 0; n < size; ++n) { + if (n != rank && srccount[n] > 0) { + MPI_Isend + ((char *)srcdata + srctypesize * srcoffset[n], + srccount[n], srcdatatype, + n, 0, comm, &requests[size + n]); + } else { + requests[size + n] = MPI_REQUEST_NULL; + } + } + /* Self communication */ + { + n = rank; + assert (dstcount[n] == srccount[n]); + memcpy ((char *)dstdata + dsttypesize * dstoffset[n], + (char *)srcdata + srctypesize * srcoffset[n], + dstcount[n] * dsttypesize); + } + /* Wait */ + MPI_Waitall (2 * size, requests, MPI_STATUSES_IGNORE); + /* */ + free (requests); + } ifcheck { if (dsttype == CCTK_VARIABLE_REAL) { |