From d7234d3d38fd6ce3cf7254eef426d28020237ff9 Mon Sep 17 00:00:00 2001 From: tradke Date: Thu, 18 Sep 2003 13:55:10 +0000 Subject: Somewhat better explanation of how to partition a grid manually. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGH/trunk@418 b61c5cb5-eaca-4651-9a7a-d64986f99364 --- doc/documentation.tex | 172 ++++++++++++++++++++++++++------------------------ 1 file changed, 90 insertions(+), 82 deletions(-) diff --git a/doc/documentation.tex b/doc/documentation.tex index 0e10271..4f9abac 100644 --- a/doc/documentation.tex +++ b/doc/documentation.tex @@ -1,7 +1,7 @@ \documentclass{article} % Use the Cactus ThornGuide style file -% (Automatically used from Cactus distribution, if you have a +% (Automatically used from Cactus distribution, if you have a % thorn without the Cactus Flesh download this from the Cactus % homepage at www.cactuscode.org) \usepackage{../../../../doc/latex/cactus} @@ -9,7 +9,7 @@ \begin{document} \title{PUGH} -\author{Gabrielle Allen} +\author{The Cactus Team\\{\tt cactusmaint@cactuscode.org}} \date{$ $Date$ $} \maketitle @@ -29,7 +29,7 @@ in 1, 2 or 3-dimensions. \section{Compilation} PUGH can be compiled with or without MPI. Compiling without MPI results -in an executable which can only be used on a single processor, compiling +in an executable which can only be used on a single processor, compiling with MPI leads to an executable which can be used with either single or multiple processors. (Section~\ref{pugh_understanding} describes how you can tell if your @@ -40,37 +40,39 @@ For configuring with MPI, see the Cactus User's Guide. \section{Grid Size} The number of grid points used for a simulation can be set in PUGH either -globally (that is, the total number of points across all processors), or -locally (that is, the number of points on each processor). +globally (that is, the total number of points across all processors), or +locally (that is, the number of points on each processor). -To set the global size of a 2D grid to be $40\times 40$ use +To set the global size of a N-D grid to be 40 grid points in each direction use -{\tt \begin{verbatim} -driver::global_nx = 40 -driver::global_ny = 40 + PUGH::global_nsize = 40 +\end{verbatim} + +To set the global size of a 2D grid to be $40\times 20$ use + +\begin{verbatim} + PUGH::global_nx = 40 + PUGH::global_ny = 20 \end{verbatim} -} To set the local size of a 2D grid to be $40\times 20$ on each processor, use -{\tt \begin{verbatim} -pugh::local_nx = 40 -pugh::local_ny = 20 + PUGH::local_nx = 40 + PUGH::local_ny = 20 \end{verbatim} -} \section{Periodic Boundary Conditions} -PUGH can implement periodic boundary conditions during the synchronization -of grid functions. Although this may at first seem a little confusing, and -unlike the usual use of boundary conditions which are directly called from +PUGH can implement periodic boundary conditions during the synchronization +of grid functions. Although this may at first seem a little confusing, and +unlike the usual use of boundary conditions which are directly called from evolution routines, it is the most efficient and natural place for periodic boundary conditions. -PUGH applies periodic conditions by simply communicating the appropriate +PUGH applies periodic conditions by simply communicating the appropriate ghostzones between ``end'' processors. For example, for a 1D domain with two ghostzones, split across two processors, Figure~\ref{pugh::fig1} shows the implementation of periodic boundary conditions. @@ -78,8 +80,8 @@ ghostzones, split across two processors, Figure~\ref{pugh::fig1} shows the imple \begin{center} \includegraphics[angle=0,width=8cm]{periodic} \end{center} -\caption[]{Implementation of periodic boundary conditions for a 1D domain, with two ghostzones, split across two processors. The lines labelled {\bf A} show the {\it standard} communications during synchronisation, the lines labelled -{\bf B} show the additional communications for periodic boundary conditions.} +\caption[]{Implementation of periodic boundary conditions for a 1D domain, with two ghostzones, split across two processors. The lines labelled {\bf A} show the {\it standard} communications during synchronisation, the lines labelled +{\bf B} show the additional communications for periodic boundary conditions.} \label{pugh::fig1} \end{figure} @@ -89,21 +91,17 @@ to switch them off in given directions. By default, no periodic boundary conditions are applied. To apply periodic boundary conditions in all directions, set -{\tt \begin{verbatim} -driver::periodic = "yes" + PUGH::periodic = "yes" \end{verbatim} -} -To apply periodic boundary conditions in just the x- and y- directions in +To apply periodic boundary conditions in just the x- and y- directions in a 3 dimensional domain, use -{\tt \begin{verbatim} -driver::periodic = "yes" -driver::periodic_z = "no" + PUGH::periodic = "yes" + PUGH::periodic_z = "no" \end{verbatim} -} \section{Processor Decomposition} @@ -113,57 +111,67 @@ By default PUGH will distribute the computational grid evenly across all processors (as in Figure~\ref{pugh::fig2}a). This may not be efficient if there is a different computational load on different processors, or for example for a simulation distributed across -different processor speeds. +processors with different per-processor performance. \begin{figure}[ht] \begin{center} \includegraphics[angle=0,width=12cm]{Partitioning} \end{center} \caption[]{Partitioning of the computational grid across processors, Figure~a) is the default type of partition used by {\tt PUGH}, Figure~b) can be set -manually, and Figure~c) is not possible with {\tt PUGH}} +manually, and Figure~c) is not possible with {\tt PUGH}} \label{pugh::fig2} \end{figure} -The computational grid can be manually partitioned in a regularly way -as in Figure~\ref{pugh::fig2}b. (Note that the type of partitioning -shown in Figure~\ref{pugh::fig2}c is not possible with {\tt PUGH}). +The computational grid can be manually partitioned in each direction +in a regularly way as in Figure~\ref{pugh::fig2}b. -The computational grid can be manually distributed using the -parameters\\ {\tt -partition[\_1d\_x|\_2d\_x|\_2d\_y|\_3d\_x|\_3d\_y|\_3d\_z]}. To manually -specify the load distribution, set {\tt pugh::partition = "manual"} +The computational grid can be manually distributed using PUGH's +string parameters \verb!partition_[1d_x|2d_x|2d_y|3d_x|3d_y|3d_z]!. +To manually specify the load distribution, set {\tt PUGH::partition = "manual"} and then, depending on the grid dimension, set the remaining parameters to distribute the load in each direction. Note that for this you need to know apriori the processor decomposition. -The decomposition is easiest to explain with an example, -to distribute a grid with $30 \times 30$ points across -4 processors (decomposed as $2 \times 2$) as: +The decomposition is easiest to explain with a simple example: +to distribute a 30-cubed grid across 4 processors (decomposed as $2 \times 1 +\times 2$, with processors 0 and 2 performing twice as fast as processors 1 +and 3) as: \begin{tabular}{cc} -$20\times 15$ & $10 \times 15$ \\ -$20\times 15$ & $10 \times 15$ +proc 2: $20 \times 30 \times 15$ & proc 3: $10 \times 30 \times 15$ \\ +proc 0: $20 \times 30 \times 15$ & proc 1: $10 \times 30 \times 15$ \\ \end{tabular} -use the parameters +you would use the following topology and partition parameter settings: -{\tt \begin{verbatim} -pugh::partition="manual" -pugh::partition_2d_x="20:10" -pugh::partition_2d_y="15:15" + # the overall grid size + PUGH::global_nsize = 30 + + # processor topology + PUGH::processor_topology = manual + PUGH::processor_topology_3d_x = 2 + PUGH::processor_topology_3d_y = 1 + PUGH::processor_topology_3d_z = 2 # redundant + + # grid partitioning + PUGH::partition = "manual" + PUGH::partition_3d_x = "20 10" \end{verbatim} -} -Note that an empty string for a direction will apply the automatic -distribution. +Each partition parameter lists the number of grid points for every processor +in that direction, with the numbers delimited by any non-digit characters. +Note that an empty string for a direction (which is the default value for +the partition parameters) will apply the automatic distribution. That's why +it is not necessary to set \verb|PUGH::partition_3d_y = "30"| or +\verb|PUGH::partition_3d_z = "15 15"| in the parameter file. \section{Understanding PUGH Output} \label{pugh_understanding} -PUGH reports information about the processor decomposition to standard output +PUGH reports information about the processor decomposition to standard output at the start of a job. This section describes how to interpret that output. \vskip .3cm @@ -196,14 +204,14 @@ If an executable has been compiled for only single processor use \item{\bf Type of evolution} -If an executable has been compiled using MPI, the first thing which +If an executable has been compiled using MPI, the first thing which PUGH reports is this fact, together with the number of processors being used: {\tt INFO (PUGH): MPI Evolution on 3 processors} -\item{\bf Maximum load skew} +\item{\bf Maximum load skew} The maximum load skew describes the variance in the number of gridpoints on each processor, and is defined by @@ -222,47 +230,47 @@ be overriden by performing the load balancing manually. \section{Useful Parameters} -There are several parameters in PUGH which are useful for debugging and +There are several parameters in PUGH which are useful for debugging and optimisation: \begin{Lentry} -\item[{\tt pugh::enable\_all\_storage}] +\item[{\tt PUGH::enable\_all\_storage}] - Enables storage for all grid variables (that is, not only - those set in a thorn's {\tt schedule.ccl} file). Try this parameter - if you are getting segmentation faults. If enabling all storage - removes the problem, it most likely means that you are accessing - a grid variable (probably in a Fortran thorn) for which storage - has not been set. +Enables storage for all grid variables (that is, not only +those set in a thorn's {\tt schedule.ccl} file). Try this parameter +if you are getting segmentation faults. If enabling all storage +removes the problem, it most likely means that you are accessing +a grid variable (probably in a Fortran thorn) for which storage +has not been set. -\item[{\tt pugh::initialise\_memory}] +\item[{\tt PUGH::initialise\_memory}] - By default, when PUGH allocates storage for a grid variable it - does not initialise its elements. If you access an - uninitialised variable on some platforms you will get a - segmentation fault (and in general you will see erratic - behaviour). This parameter can be used to initialise all - elements to zero, if this removes your segmentation fault you - can then track down the cause of the problem by using the same - parameter to initialize all elements to NaNs and then track - them with the thorn {\tt CactusUtils/NaNChecker}. +By default, when PUGH allocates storage for a grid variable it +does not initialise its elements. If you access an +uninitialised variable on some platforms you will get a +segmentation fault (and in general you will see erratic +behaviour). This parameter can be used to initialise all +elements to zero, if this removes your segmentation fault you +can then track down the cause of the problem by using the same +parameter to initialize all elements to NaNs and then track +them with the thorn {\tt CactusUtils/NaNChecker}. - Note that it isn't recommended to simply use this parameter to - initialise all elements to zero, instead we recommend you to - set all variables to their correct values before using them. + Note that it isn't recommended to simply use this parameter to +initialise all elements to zero, instead we recommend you to +set all variables to their correct values before using them. -\item[{\tt pugh::storage\_verbose}] +\item[{\tt PUGH::storage\_verbose}] - This parameter can be set to print out the number of grid variables - which have storage allocated at each iteration, and the total - size of the storage allocated by Cactus. Note that this total - does not include storage allocated independently in thorns. +This parameter can be set to print out the number of grid variables +which have storage allocated at each iteration, and the total +size of the storage allocated by Cactus. Note that this total +does not include storage allocated independently in thorns. -\item[{\tt pugh::timer\_output}] +\item[{\tt PUGH::timer\_output}] - This parameter can be set to provide the time spent communicating - variables between processors. +This parameter can be set to provide the time spent communicating +variables between processors. \end{Lentry} -- cgit v1.2.3