aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorschnetter <schnetter@b61c5cb5-eaca-4651-9a7a-d64986f99364>2005-09-26 03:06:25 +0000
committerschnetter <schnetter@b61c5cb5-eaca-4651-9a7a-d64986f99364>2005-09-26 03:06:25 +0000
commit381b759f8afba04cf673cb0540c142f117b84913 (patch)
tree85739e0298e574a110af421f69c4a53d9529a98b
parent710e1e23cfecc96153ac103f13e04ceec625dd44 (diff)
Apply Frank Loeffler's patch to improve the processor distribution of
PUGH: Up to now, PUGH does not take the actual grid size into account while setting the number of processors/domains in each direction. This gives problems, e.g. using boxes like 200x7x7 (with 3 ghostzones). It also does not really do a good job in situations with non near-cubic domains (like in bitant mode it devides first in the z-direction, which is usually the smallest). I inserted another decomposition algorithm which is looking for the longest direction and devides this if possible. Because it was done to be easy to understand, it can fail in certain situations, in which it is gracefully falling back to the old decomposition (e.g. a cubic box and 9 processors). To include this algorithm I added a keyword "automatic_old" to PUGH::processor_topology. The new behaviour is the default; this keyword re-enables the old behaviour. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGH/trunk@471 b61c5cb5-eaca-4651-9a7a-d64986f99364
-rw-r--r--doc/documentation.tex9
-rw-r--r--param.ccl5
-rw-r--r--src/SetupGroup.c3
-rw-r--r--src/SetupPGH.c1
-rw-r--r--src/SetupPGV.c105
-rw-r--r--src/include/pughi.h4
6 files changed, 123 insertions, 4 deletions
diff --git a/doc/documentation.tex b/doc/documentation.tex
index 4f9abac..53aed8d 100644
--- a/doc/documentation.tex
+++ b/doc/documentation.tex
@@ -166,6 +166,15 @@ the partition parameters) will apply the automatic distribution. That's why
it is not necessary to set \verb|PUGH::partition_3d_y = "30"| or
\verb|PUGH::partition_3d_z = "15 15"| in the parameter file.
+Because the previous automatic distribution gave problems in some
+cases (e.g.\ very long box in one, but short in other directions),
+there is now an improved algorithm that tries to do a better job in
+decomposing the grid evenly to the processors. However, it can fail
+in certain situations, in which it is gracefully falling back to the
+previous (\verb|"automatic_old"|) giving a warning. Note that, if one
+or more of the parameters \verb|PUGH::processor_topology_3d_*| or
+\verb|PUGH::partition_3d_*| are set, this mode automatically falls back to
+\verb|"automatic_old"| without warning.
\section{Understanding PUGH Output}
diff --git a/param.ccl b/param.ccl
index 7aea25b..e15a570 100644
--- a/param.ccl
+++ b/param.ccl
@@ -119,8 +119,9 @@ BOOLEAN enable_all_storage "Enable storage for all GFs?"
KEYWORD processor_topology "How to determine the processor topology" STEERABLE = RECOVER
{
- "manual" :: "Specified by proc_top_nx etc"
- "automatic" :: "Automatically generated"
+ "manual" :: "Specified by proc_top_nx etc"
+ "automatic" :: "Automatically generated"
+ "automatic_old" :: "Automatically generated (old method)"
} "automatic"
INT processor_topology_1d_x "No of Procs in X direction" STEERABLE = RECOVER
diff --git a/src/SetupGroup.c b/src/SetupGroup.c
index fa4a922..76618fd 100644
--- a/src/SetupGroup.c
+++ b/src/SetupGroup.c
@@ -155,7 +155,8 @@ static int PUGH_SetupGAGroup (pGH *newGH,
}
}
- connectivity = PUGH_SetupConnectivity (dim, newGH->nprocs, nprocs, perme);
+ connectivity = PUGH_SetupConnectivity (dim, newGH->nprocs, nsize,
+ ghostsize, nprocs, perme);
extras = PUGH_SetupPGExtras (0, dim, perme, staggercode, nsize, ghostsize,
newGH->nprocs, connectivity->nprocs,
diff --git a/src/SetupPGH.c b/src/SetupPGH.c
index 8b56808..ab71fb4 100644
--- a/src/SetupPGH.c
+++ b/src/SetupPGH.c
@@ -144,6 +144,7 @@ pGH *PUGH_SetupPGH (void *callerid,
}
pughGH->Connectivity[idim-1] = PUGH_SetupConnectivity (idim, pughGH->nprocs,
+ nsize, nghostzones,
nprocs, perme);
free(nprocs);
diff --git a/src/SetupPGV.c b/src/SetupPGV.c
index b01403b..93a2d56 100644
--- a/src/SetupPGV.c
+++ b/src/SetupPGV.c
@@ -180,9 +180,12 @@ void PUGH_DestroyPGExtras(pGExtras **PGExtras)
@@*/
pConnectivity *PUGH_SetupConnectivity(int dim,
int total_procs,
+ const int *nsize,
+ const int *nghostzones,
int *nprocs,
int *perme)
{
+ DECLARE_CCTK_PARAMETERS
pConnectivity *this;
int i;
@@ -238,7 +241,15 @@ pConnectivity *PUGH_SetupConnectivity(int dim,
this->neighbours[i] = this->neighbours[0]+(2*dim*i);
}
- PUGH_GenerateTopology(dim, total_procs, this->nprocs);
+ if (CCTK_EQUALS(processor_topology, "automatic"))
+ {
+ PUGH_GenerateAlternativeTopology(dim, total_procs, nsize, nghostzones,
+ this->nprocs);
+ }
+ else
+ {
+ PUGH_GenerateTopology(dim, total_procs, this->nprocs);
+ }
PUGH_GenerateNeighbours(dim, total_procs, this->nprocs, this->neighbours,
this->perme);
}
@@ -382,6 +393,98 @@ int PUGH_GenerateTopology(int dim, int total_procs, int *nprocs)
return retval;
}
+
+
+ /*@@
+ @routine PUGH_GenerateAlternativeTopology
+ @date Thu Feb 02 17:39:21 2005
+ @author Frank Loeffler
+ @desc
+ Generate the appropriate processor topology for this processor
+ decomposition.
+ This routine tries to decompose using the information about the actual
+ grid size. It can fail in certain situations in which it falls back
+ gracefully to the traditional decomposition giving a warning.
+ It also does not support manually set topologies and falls back in this
+ case.
+ @enddesc
+ @history
+ @endhistory
+@@*/
+int PUGH_GenerateAlternativeTopology(int dim,
+ int total_procs, const int *nsize,
+ const int *nghostzones, int *nprocs)
+{
+ int i;
+ int max_dir, max_length;
+ int free_procs = total_procs-1;
+ int used_procs = 1;
+
+
+ /* Nothing to decompose here */
+ if (dim == 0)
+ {
+ return PUGH_GenerateTopology(dim, total_procs, nprocs);
+ }
+ /* If there are numbers already set up or we have funny grid sizes,
+ fall back */
+ for (i = 0; i < dim; i++)
+ {
+ if (nprocs[i] || (nsize[i]<1))
+ {
+ return PUGH_GenerateTopology(dim, total_procs, nprocs);
+ }
+ }
+ /* start with a single domain */
+ for (i = 0; i < dim; i++)
+ {
+ nprocs[i] = 1;
+ }
+ /* divide as long as there are processors left */
+ while (free_procs)
+ {
+ used_procs = total_procs - free_procs;
+ /* find the direction with the longest length, which is allowed */
+ max_dir = -1;
+ max_length = 0;
+ for (i = 0; i < dim; i++)
+ {
+ /* is one part larger than the max? (then it might be a new max) */
+ if ((nsize[i] / nprocs[i] > max_length) &&
+ /* would there be at least one real point if we divide? */
+ (nsize[i] > 2*nghostzones[i]+nprocs[i]) &&
+ /* do we have enough processors left to divide in this direction? */
+ (used_procs/nprocs[i] <= free_procs))
+ {
+ max_length = nsize[i] / nprocs[i];
+ max_dir = i;
+ }
+ }
+ /* if no such direction is found: fall back giving a warning */
+ if (max_dir == -1)
+ {
+ for (i = 0; i < dim; i++)
+ {
+ nprocs[i] = 0;
+ }
+ CCTK_WARN(CCTK_WARN_COMPLAIN,
+ "Falling back to the old PUGH topology method, overwriting "
+ "eventually set manual topologies");
+ return PUGH_GenerateTopology(dim, total_procs, nprocs);
+ }
+ /* count the new direction and go on */
+ /* note: this is garanteed to decrement at least by one, since the number
+ of used processors is naturally always >= the number of processors used
+ for one dimension */
+ free_procs -= used_procs/nprocs[max_dir];
+ nprocs[max_dir]++;
+ }
+ /* success */
+ return 0;
+}
+
+
+
/*@@
@routine PUGH_GenerateNeighbours
@date Mon Nov 8 08:15:08 1999
diff --git a/src/include/pughi.h b/src/include/pughi.h
index c2c45a7..84cf6ba 100644
--- a/src/include/pughi.h
+++ b/src/include/pughi.h
@@ -18,6 +18,8 @@ extern "C"
pConnectivity *PUGH_SetupConnectivity(int dim,
int total_procs,
+ const int *nsize,
+ const int *nghostzones,
int *nprocs,
int *perme);
@@ -49,6 +51,8 @@ void PUGH_DestroyPGExtras(pGExtras **PGExtras);
void PUGH_DestroyPGH(pGH **pughGH);
int PUGH_GenerateTopology(int dim, int total_procs, int *nprocs);
+int PUGH_GenerateAlternativeTopology(int dim, int total_procs, const int *nsize,
+ const int *nghostzones, int *nprocs);
int PUGH_GenerateNeighbours(int dim,
int total_procs,