aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorschnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8>2006-08-04 14:39:58 +0000
committerschnetter <schnetter@2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8>2006-08-04 14:39:58 +0000
commit738e46497e17a6fe78b03e4c51fdd7eee4f185af (patch)
tree069337253b79b82deeb09c752948a0fca93b69ec
parentdbe148103042e5637a3eab452c0a8e460b9f9299 (diff)
Introduce a new parameter "use_alltoallv" that decides whether to use
MPI_Alltoallv or to use MPI_Irecv, MPI_Isend, and MPI_Wait to communicate. The latter is not tested, but may be faster if only a few of all processors are involved in the communication. Disable some self tests: Allow processors who do not participate in the communication (who specify a zero slab size) to not know the geometry of the source and destination slabs. git-svn-id: http://svn.cactuscode.org/arrangements/CactusNumerical/Slab/trunk@50 2e825fa2-fb71-486d-8b7f-a5ff3f0f6cb8
-rw-r--r--param.ccl4
-rw-r--r--src/slab.c116
2 files changed, 90 insertions, 30 deletions
diff --git a/param.ccl b/param.ccl
index 3be94cc..e3d2ae2 100644
--- a/param.ccl
+++ b/param.ccl
@@ -4,3 +4,7 @@
BOOLEAN timer_output "Print slabbing timings at shutdown time" STEERABLE=always
{
} "no"
+
+BOOLEAN use_alltoallv "Use MPI_Alltoallv for communication?" STEERABLE=always
+{
+} "yes"
diff --git a/src/slab.c b/src/slab.c
index 6bc418c..698bdac 100644
--- a/src/slab.c
+++ b/src/slab.c
@@ -4,6 +4,7 @@
TODO:
Provide facilities for dim > 3
Set up the slab exchange information in advance
+ Slab in several stages
Test slabbing without MPI
Allow using / not setting the ghost zones
Allow not using / not setting the boundaries
@@ -33,6 +34,7 @@
#include <string.h>
#include "cctk.h"
+#include "cctk_Parameters.h"
#include "cctk_DefineThorn.h"
#include "util_ErrorCodes.h"
#include "util_Table.h"
@@ -669,6 +671,8 @@ int Slab_MultiTransfer (cGH const * const cctkGH,
int const * const dsttypes,
void * const * const dstptrs)
{
+ DECLARE_CCTK_PARAMETERS;
+
struct info * restrict info;
size_t srclentot, dstlentot;
@@ -811,8 +815,11 @@ int Slab_MultiTransfer (cGH const * const cctkGH,
for (d=0; d<SLAB_MAXDIM; ++d) {
assert (iflag[d]);
}
+ /* Allow non-contributing processors to be non-knowledgeable */
for (d=0; d<SLAB_MAXDIM; ++d) {
- assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len);
+ if (info[info[d].xpose].src.slab.len && info[d].dst.slab.len > 0) {
+ assert (info[info[d].xpose].src.slab.len == info[d].dst.slab.len);
+ }
}
}
@@ -895,32 +902,41 @@ int Slab_MultiTransfer (cGH const * const cctkGH,
for (n = 0; n < size; ++n) {
for (d=0; d<SLAB_MAXDIM; ++d) {
- assert
- (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off);
- assert
- (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len);
- assert
- (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str);
- assert
- (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off);
- assert
- (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len);
- assert
- (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str);
- assert
- (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str);
- assert
- (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str);
- assert
- (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str);
- assert
- (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str);
- /* 2003-03-01 schnetter: I don't know why the following two
- should be necessary */
- assert
- (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str);
- assert
- (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str);
+ /* Allow non-contributing processors to be non-knowledgeable */
+ if (allinfo[n*SLAB_MAXDIM+d].src.slab.len > 0
+ && info[d].src.slab.len > 0)
+ {
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].src.global.off == info[d].src.global.off);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].src.global.len == info[d].src.global.len);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].src.global.str == info[d].src.global.str);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].src.local.str == info[d].src.local.str);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].src.active.str == info[d].src.active.str);
+ /* 2003-03-01 schnetter: I don't know why the following should
+ be necessary */
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].src.slab.str == info[d].src.slab.str);
+ }
+ if (allinfo[n*SLAB_MAXDIM+d].dst.slab.len > 0
+ && info[d].dst.slab.len > 0)
+ {
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].dst.global.off == info[d].dst.global.off);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].dst.global.len == info[d].dst.global.len);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].dst.global.str == info[d].dst.global.str);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].dst.local.str == info[d].dst.local.str);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].dst.active.str == info[d].dst.active.str);
+ assert
+ (allinfo[n*SLAB_MAXDIM+d].dst.slab.str == info[d].dst.slab.str);
+ }
assert (allinfo[n*SLAB_MAXDIM+d].xpose == info[d].xpose);
assert (allinfo[n*SLAB_MAXDIM+d].flip == info[d].flip);
}
@@ -1291,9 +1307,49 @@ int Slab_MultiTransfer (cGH const * const cctkGH,
CCTK_TimerStartI (timer_xfer);
ifdebug fflush (stdout);
- MPI_Alltoallv
- (srcdata, srccount, srcoffset, srcdatatype,
- dstdata, dstcount, dstoffset, dstdatatype, comm);
+ if (use_alltoallv) {
+ MPI_Alltoallv
+ (srcdata, srccount, srcoffset, srcdatatype,
+ dstdata, dstcount, dstoffset, dstdatatype, comm);
+ } else {
+ /* */
+ MPI_Request * requests = malloc (2 * size * sizeof * requests);
+ assert (requests);
+ /* Start receive */
+ for (n = 0; n < size; ++n) {
+ if (n != rank && dstcount[n] > 0) {
+ MPI_Irecv
+ ((char *)dstdata + dsttypesize * dstoffset[n],
+ dstcount[n], dstdatatype,
+ n, 0, comm, &requests[n]);
+ } else {
+ requests[n] = MPI_REQUEST_NULL;
+ }
+ }
+ /* Start send */
+ for (n = 0; n < size; ++n) {
+ if (n != rank && srccount[n] > 0) {
+ MPI_Isend
+ ((char *)srcdata + srctypesize * srcoffset[n],
+ srccount[n], srcdatatype,
+ n, 0, comm, &requests[size + n]);
+ } else {
+ requests[size + n] = MPI_REQUEST_NULL;
+ }
+ }
+ /* Self communication */
+ {
+ n = rank;
+ assert (dstcount[n] == srccount[n]);
+ memcpy ((char *)dstdata + dsttypesize * dstoffset[n],
+ (char *)srcdata + srctypesize * srcoffset[n],
+ dstcount[n] * dsttypesize);
+ }
+ /* Wait */
+ MPI_Waitall (2 * size, requests, MPI_STATUSES_IGNORE);
+ /* */
+ free (requests);
+ }
ifcheck {
if (dsttype == CCTK_VARIABLE_REAL) {