From d7234d3d38fd6ce3cf7254eef426d28020237ff9 Mon Sep 17 00:00:00 2001
From: tradke <tradke@b61c5cb5-eaca-4651-9a7a-d64986f99364>
Date: Thu, 18 Sep 2003 13:55:10 +0000
Subject: Somewhat better explanation of how to partition a grid manually.

git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGH/trunk@418 b61c5cb5-eaca-4651-9a7a-d64986f99364
---
 doc/documentation.tex | 172 ++++++++++++++++++++++++++------------------------
 1 file changed, 90 insertions(+), 82 deletions(-)

diff --git a/doc/documentation.tex b/doc/documentation.tex
index 0e10271..4f9abac 100644
--- a/doc/documentation.tex
+++ b/doc/documentation.tex
@@ -1,7 +1,7 @@
 \documentclass{article}
 
 % Use the Cactus ThornGuide style file
-% (Automatically used from Cactus distribution, if you have a 
+% (Automatically used from Cactus distribution, if you have a
 %  thorn without the Cactus Flesh download this from the Cactus
 %  homepage at www.cactuscode.org)
 \usepackage{../../../../doc/latex/cactus}
@@ -9,7 +9,7 @@
 \begin{document}
 
 \title{PUGH}
-\author{Gabrielle Allen}
+\author{The Cactus Team\\{\tt cactusmaint@cactuscode.org}}
 \date{$ $Date$ $}
 
 \maketitle
@@ -29,7 +29,7 @@ in 1, 2 or 3-dimensions.
 \section{Compilation}
 
 PUGH can be compiled with or without MPI. Compiling without MPI results
-in an executable which can only be used on a single processor, compiling 
+in an executable which can only be used on a single processor, compiling
 with MPI leads to an executable which can be used with either single or
 multiple processors.
 (Section~\ref{pugh_understanding} describes how you can tell if your
@@ -40,37 +40,39 @@ For configuring with MPI, see the Cactus User's Guide.
 \section{Grid Size}
 
 The number of grid points used for a simulation can be set in PUGH either
-globally (that is, the total number of points across all processors), or 
-locally (that is, the number of points on each processor). 
+globally (that is, the total number of points across all processors), or
+locally (that is, the number of points on each processor).
 
-To set the global size of a 2D grid to be $40\times 40$ use
+To set the global size of a N-D grid to be 40 grid points in each direction use
 
-{\tt
 \begin{verbatim}
-driver::global_nx = 40
-driver::global_ny = 40
+  PUGH::global_nsize = 40
+\end{verbatim}
+
+To set the global size of a 2D grid to be $40\times 20$ use
+
+\begin{verbatim}
+  PUGH::global_nx = 40
+  PUGH::global_ny = 20
 \end{verbatim}
-}
 
 To set the local size of a 2D grid to be $40\times 20$ on each processor, use
 
-{\tt
 \begin{verbatim}
-pugh::local_nx = 40
-pugh::local_ny = 20
+  PUGH::local_nx = 40
+  PUGH::local_ny = 20
 \end{verbatim}
-}
 
 
 \section{Periodic Boundary Conditions}
 
-PUGH can implement periodic boundary conditions during the synchronization 
-of grid functions. Although this may at first seem a little confusing, and 
-unlike the usual use of boundary conditions which are directly called from 
+PUGH can implement periodic boundary conditions during the synchronization
+of grid functions. Although this may at first seem a little confusing, and
+unlike the usual use of boundary conditions which are directly called from
 evolution routines, it is the most efficient and natural place for periodic
 boundary conditions.
 
-PUGH applies periodic conditions by simply communicating the appropriate 
+PUGH applies periodic conditions by simply communicating the appropriate
 ghostzones between ``end'' processors. For example, for a 1D domain with two
 ghostzones, split across two processors, Figure~\ref{pugh::fig1} shows the implementation of periodic boundary conditions.
 
@@ -78,8 +80,8 @@ ghostzones, split across two processors, Figure~\ref{pugh::fig1} shows the imple
 \begin{center}
 \includegraphics[angle=0,width=8cm]{periodic}
 \end{center}
-\caption[]{Implementation of periodic boundary conditions for a 1D domain, with two ghostzones, split across two processors. The lines labelled {\bf A} show the {\it standard} communications during synchronisation, the lines labelled 
-{\bf B} show the additional communications for periodic boundary conditions.} 
+\caption[]{Implementation of periodic boundary conditions for a 1D domain, with two ghostzones, split across two processors. The lines labelled {\bf A} show the {\it standard} communications during synchronisation, the lines labelled
+{\bf B} show the additional communications for periodic boundary conditions.}
 \label{pugh::fig1}
 \end{figure}
 
@@ -89,21 +91,17 @@ to switch them off in given directions.
 
 By default, no periodic boundary conditions are applied. To apply periodic boundary conditions in all directions, set
 
-{\tt
 \begin{verbatim}
-driver::periodic = "yes"
+  PUGH::periodic = "yes"
 \end{verbatim}
-}
 
-To apply periodic boundary conditions in just the x- and y- directions in 
+To apply periodic boundary conditions in just the x- and y- directions in
 a 3 dimensional domain, use
 
-{\tt
 \begin{verbatim}
-driver::periodic = "yes"
-driver::periodic_z = "no"
+  PUGH::periodic = "yes"
+  PUGH::periodic_z = "no"
 \end{verbatim}
-}
 
 
 \section{Processor Decomposition}
@@ -113,57 +111,67 @@ By default PUGH will distribute the computational grid evenly across
 all processors (as in Figure~\ref{pugh::fig2}a). This may not be
 efficient if there is a different computational load on different
 processors, or for example for a simulation distributed across
-different processor speeds.
+processors with different per-processor performance.
 
 \begin{figure}[ht]
 \begin{center}
 \includegraphics[angle=0,width=12cm]{Partitioning}
 \end{center}
 \caption[]{Partitioning of the computational grid across processors, Figure~a) is the default type of partition used by {\tt PUGH}, Figure~b) can be set
-manually, and Figure~c) is not possible with {\tt PUGH}} 
+manually, and Figure~c) is not possible with {\tt PUGH}}
 \label{pugh::fig2}
 \end{figure}
 
-The computational grid can be manually partitioned in a regularly way 
-as in Figure~\ref{pugh::fig2}b. (Note that the type of partitioning 
-shown in Figure~\ref{pugh::fig2}c is not possible with {\tt PUGH}).
+The computational grid can be manually partitioned in each direction
+in a regularly way as in Figure~\ref{pugh::fig2}b.
 
-The computational grid can be manually distributed using the
-parameters\\ {\tt
-partition[\_1d\_x|\_2d\_x|\_2d\_y|\_3d\_x|\_3d\_y|\_3d\_z]}. To manually
-specify the load distribution, set {\tt pugh::partition = "manual"}
+The computational grid can be manually distributed using PUGH's
+string parameters \verb!partition_[1d_x|2d_x|2d_y|3d_x|3d_y|3d_z]!.
+To manually specify the load distribution, set {\tt PUGH::partition = "manual"}
 and then, depending on the grid dimension, set the remaining
 parameters to distribute the load in each direction. Note that for
 this you need to know apriori the processor decomposition.
 
-The decomposition is easiest to explain with an example,
-to distribute a grid with $30 \times 30$ points across
-4 processors (decomposed as $2 \times 2$) as:
+The decomposition is easiest to explain with a simple example:
+to distribute a 30-cubed grid across 4 processors (decomposed as $2 \times 1
+\times 2$, with processors 0 and 2 performing twice as fast as processors 1
+and 3) as:
 
 \begin{tabular}{cc}
-$20\times 15$ & $10 \times 15$ \\
-$20\times 15$ & $10 \times 15$ 
+proc 2: $20 \times 30 \times 15$ & proc 3: $10 \times 30 \times 15$ \\
+proc 0: $20 \times 30 \times 15$ & proc 1: $10 \times 30 \times 15$ \\
 \end{tabular}
 
-use the parameters
+you would use the following topology and partition parameter settings:
 
-{\tt
 \begin{verbatim}
-pugh::partition="manual"
-pugh::partition_2d_x="20:10"
-pugh::partition_2d_y="15:15"
+  # the overall grid size
+  PUGH::global_nsize = 30
+
+  # processor topology
+  PUGH::processor_topology      = manual
+  PUGH::processor_topology_3d_x = 2
+  PUGH::processor_topology_3d_y = 1
+  PUGH::processor_topology_3d_z = 2     # redundant
+
+  # grid partitioning
+  PUGH::partition      = "manual"
+  PUGH::partition_3d_x = "20 10"
 \end{verbatim}
-}
 
-Note that an empty string for a direction will apply the automatic
-distribution.
+Each partition parameter lists the number of grid points for every processor
+in that direction, with the numbers delimited by any non-digit characters.
+Note that an empty string for a direction (which is the default value for
+the partition parameters) will apply the automatic distribution. That's why
+it is not necessary to set \verb|PUGH::partition_3d_y = "30"| or
+\verb|PUGH::partition_3d_z = "15 15"| in the parameter file.
 
 
 \section{Understanding PUGH Output}
 
 \label{pugh_understanding}
 
-PUGH reports information about the processor decomposition to standard output 
+PUGH reports information about the processor decomposition to standard output
 at the start of a job. This section describes how to interpret that output.
 
 \vskip .3cm
@@ -196,14 +204,14 @@ If an executable has been compiled for only single processor use
 
 \item{\bf Type of evolution}
 
-If an executable has been compiled using MPI, the first thing which 
+If an executable has been compiled using MPI, the first thing which
 PUGH reports is this fact, together with the number of processors
 being used:
 
 {\tt INFO (PUGH): MPI Evolution on 3 processors}
 
 
-\item{\bf Maximum load skew} 
+\item{\bf Maximum load skew}
 
 The maximum load skew describes the variance in the number of gridpoints on
 each processor, and is defined by
@@ -222,47 +230,47 @@ be overriden by performing the load balancing manually.
 
 \section{Useful Parameters}
 
-There are several parameters in PUGH which are useful for debugging and 
+There are several parameters in PUGH which are useful for debugging and
 optimisation:
 
 \begin{Lentry}
 
-\item[{\tt pugh::enable\_all\_storage}]
+\item[{\tt PUGH::enable\_all\_storage}]
 
-	Enables storage for all grid variables (that is, not only 
-	those set in a thorn's {\tt schedule.ccl} file). Try this parameter
-	if you are getting segmentation faults.  If enabling all storage
-	removes the problem, it most likely means that you are accessing
-	a grid variable (probably in a Fortran thorn) for which storage
-	has not been set.
+Enables storage for all grid variables (that is, not only
+those set in a thorn's {\tt schedule.ccl} file). Try this parameter
+if you are getting segmentation faults.  If enabling all storage
+removes the problem, it most likely means that you are accessing
+a grid variable (probably in a Fortran thorn) for which storage
+has not been set.
 
-\item[{\tt pugh::initialise\_memory}]
+\item[{\tt PUGH::initialise\_memory}]
 
-	By default, when PUGH allocates storage for a grid variable it
-	does not initialise its elements. If you access an
-	uninitialised variable on some platforms you will get a
-	segmentation fault (and in general you will see erratic
-	behaviour). This parameter can be used to initialise all
-	elements to zero, if this removes your segmentation fault you
-	can then track down the cause of the problem by using the same
-	parameter to initialize all elements to NaNs and then track 
-	them with the thorn {\tt CactusUtils/NaNChecker}.
+By default, when PUGH allocates storage for a grid variable it
+does not initialise its elements. If you access an
+uninitialised variable on some platforms you will get a
+segmentation fault (and in general you will see erratic
+behaviour). This parameter can be used to initialise all
+elements to zero, if this removes your segmentation fault you
+can then track down the cause of the problem by using the same
+parameter to initialize all elements to NaNs and then track
+them with the thorn {\tt CactusUtils/NaNChecker}.
 
-   	Note that it isn't recommended to simply use this parameter to 
-	initialise all elements to zero, instead we recommend you to 
-	set all variables to their correct values before using them.
+   Note that it isn't recommended to simply use this parameter to
+initialise all elements to zero, instead we recommend you to
+set all variables to their correct values before using them.
 
-\item[{\tt pugh::storage\_verbose}]
+\item[{\tt PUGH::storage\_verbose}]
 
-	This parameter can be set to print out the number of grid variables
-	which have storage allocated at each iteration, and the total 
-	size of the storage allocated by Cactus. Note that this total
-	does not include storage allocated independently in thorns.
+This parameter can be set to print out the number of grid variables
+which have storage allocated at each iteration, and the total
+size of the storage allocated by Cactus. Note that this total
+does not include storage allocated independently in thorns.
 
-\item[{\tt pugh::timer\_output}]
+\item[{\tt PUGH::timer\_output}]
 
-	This parameter can be set to provide the time spent communicating
-	variables between processors.
+This parameter can be set to provide the time spent communicating
+variables between processors.
 
 \end{Lentry}
 
-- 
cgit v1.2.3