aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authortradke <tradke@1c20744c-e24a-42ec-9533-f5004cb800e5>2002-12-20 12:38:53 +0000
committertradke <tradke@1c20744c-e24a-42ec-9533-f5004cb800e5>2002-12-20 12:38:53 +0000
commitf85a8454dd5dad7e6537e06f1969a933512c5802 (patch)
tree47d0018161384ad854ab68bb46f7e403fc6ff431 /src
parent66f121529563e2a05fb30e9f804d6974c48829b8 (diff)
Initial version of PUGHInterp_InterpGridArrays() which implements the new
global interpolation API and overloads CCTK_InterpGridArrays(). No table options are evaluated yet. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHInterp/trunk@36 1c20744c-e24a-42ec-9533-f5004cb800e5
Diffstat (limited to 'src')
-rw-r--r--src/InterpGridArrays.c942
-rw-r--r--src/make.code.defn2
-rw-r--r--src/pughInterpGH.h16
3 files changed, 959 insertions, 1 deletions
diff --git a/src/InterpGridArrays.c b/src/InterpGridArrays.c
new file mode 100644
index 0000000..60eca3f
--- /dev/null
+++ b/src/InterpGridArrays.c
@@ -0,0 +1,942 @@
+ /*@@
+ @file InterpGridArrays.c
+ @date Tue 10 Dec 2002
+ @author Thomas Radke
+ @desc
+ Implementation of PUGHInterp's global interpolator routine
+ which overloads CCTK_InterpGridArrays()
+ @enddesc
+
+ @history
+ @date Tue 10 Dec 2002
+ @author Thomas Radke
+ @hdesc source code copied from Operator.c which implements the old
+ API for global interpolation
+ @endhistory
+ @version $Id$
+ @@*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "util_ErrorCodes.h"
+#include "util_Table.h"
+#include "cctk.h"
+#include "CactusPUGH/PUGH/src/include/pugh.h"
+#include "pughInterpGH.h"
+
+/* the rcs ID and its dummy function to use it */
+static const char *rcsid = "$Header$";
+CCTK_FILEVERSION(CactusPUGH_PUGHInterp_InterpGridArrays_c)
+
+
+/********************************************************************
+ ******************** Macro Definitions ************************
+ ********************************************************************/
+/* uncomment this to get some debugging output */
+/* #define PUGHINTERP_DEBUG 1 */
+
+/* fudge factor for mapping points onto processors */
+#define FUDGE 0.0
+
+/* macro do sort interpolation results from a single communication buffer
+ into their appropriate output arrays */
+#define SORT_TYPED_ARRAY(cctk_type) \
+ { \
+ int _i; \
+ cctk_type *_src, *_dst; \
+ \
+ \
+ _src = (cctk_type *) this->buf; \
+ _dst = (cctk_type *) output_arrays[array]; \
+ for (_i = 0; _i < myGH->N_points_from[proc]; _i++) \
+ { \
+ _dst[myGH->indices[_i + offset]] = *_src++; \
+ } \
+ this->buf = (char *) _src; \
+ }
+
+
+
+/********************************************************************
+ *********************** Type Definitions **********************
+ ********************************************************************/
+#ifdef CCTK_MPI
+/* internal structure describing a handle for a single CCTK data type */
+typedef struct
+{
+ int vtypesize; /* variable type's size in bytes */
+ MPI_Datatype mpitype; /* corresponding MPI datatype */
+ int N_arrays; /* number of in/out arrays */
+ void *sendbuf; /* communication send buffer for this type */
+ void *recvbuf; /* communication receive buffer for this type */
+ char *buf; /* work pointer for sendbuf */
+} type_desc_t;
+#endif
+
+
+/********************************************************************
+ ******************** Internal Routines ************************
+ ********************************************************************/
+static int CreateParameterTable (int global_param_table_handle,
+ int local_param_table_handle);
+
+
+/*@@
+ @routine PUGHInterp_InterpGridArrays
+ @date Mon 16 Dec 2002
+ @author Thomas Radke
+ @desc
+ PUGHInterp's interpolation routine for distributed grid arrays.
+ This routine overloads CCTK_InterpGridArrays().
+ @enddesc
+
+ @var GH
+ @vdesc pointer to CCTK grid hierarchy
+ @vtype const cGH *
+ @vio in
+ @endvar
+ @var N_dims
+ @vdesc number of dimensions for the interpolation
+ @vtype int
+ @vio in
+ @endvar
+ @var global_param_table_handle
+ @vdesc parameter table handle for passing optional parameters to the
+ global interpolator routine
+ @vtype int
+ @vio in
+ @endvar
+ @var local_param_table_handle
+ @vdesc parameter table handle for passing optional parameters to the
+ local interpolator routine
+ @vtype int
+ @vio in
+ @endvar
+ @var coord_system_handle
+ @vdesc handle for the underlying coordinate system
+ @vtype int
+ @vio in
+ @endvar
+ @var N_points
+ @vdesc number of points to interpolate at
+ @vtype int
+ @vio in
+ @endvar
+ @var interp_coords_type
+ @vdesc CCTK datatype of the coordinate arrays as passed via
+ <interp_coords> (common datatype for all arrays)
+ @vtype int
+ @vio in
+ @endvar
+ @var interp_coords
+ @vdesc list of <N_dims> arrays with coordinate for <N_points>
+ points to interpolate at
+ @vtype const void *const []
+ @vio in
+ @endvar
+ @var N_input_arrays
+ @vdesc number of input arrays
+ @vtype int
+ @vio in
+ @endvar
+ @var input_array_indices
+ @vdesc list of <N_input_arrays> grid variables (given by their indices)
+ to interpolate
+ @vtype const CCTK_INT []
+ @vio in
+ @endvar
+ @var N_output_arrays
+ @vdesc number of output arrays
+ @vtype int
+ @vio in
+ @endvar
+ @var out_array_types
+ @vdesc list of <N_output_arrays> requested CCTK datatypes for the
+ output arrays
+ @vtype const CCTK_INT []
+ @vio in
+ @endvar
+ @var output_arrays
+ @vdesc list of <N_output_arrays> output arrays (given by their pointers)
+ which receive the interpolation results
+ @vtype void *const []
+ @vio out
+ @endvar
+
+ @returntype int
+ @returndesc
+ 0 - successful interpolation
+ -1 - in case of any errors
+ @endreturndesc
+@@*/
+int PUGHInterp_InterpGridArrays (const cGH *GH,
+ int N_dims,
+ int global_param_table_handle,
+ int local_param_table_handle,
+ int local_interp_handle,
+ int coord_system_handle,
+ int N_points,
+ int interp_coords_type,
+ const void *const interp_coords[],
+ int N_input_arrays,
+ const CCTK_INT input_array_indices[],
+ int N_output_arrays,
+ const CCTK_INT output_array_types[],
+ void *const output_arrays[])
+{
+ int i, param_table_handle, retval;
+ CCTK_REAL *origin_local, *delta_local;
+ CCTK_INT *input_array_dims, *input_array_types;
+ CCTK_REAL **interp_coords_local;
+ const void **input_arrays;
+ const char *coord_system_name;
+ const pGH *pughGH;
+ const pGExtras *extras;
+ cGroupDynamicData group_data;
+#ifdef CCTK_MPI
+ pughInterpGH *myGH;
+ int N_points_local, N_types;
+ int j, point, proc, array, type, offset;
+ char *msg;
+ char **output_arrays_local;
+ type_desc_t *this, *type_desc;
+ CCTK_REAL *range_min, *range_max;
+ CCTK_REAL *origin_global, *delta_global;
+ CCTK_REAL *interp_coords_proc, *coords;
+#endif
+
+
+ /* get GH extension handles for PUGHInterp and PUGH */
+ myGH = CCTK_GHExtension (GH, "PUGHInterp");
+ pughGH = CCTK_GHExtension (GH, "PUGH");
+
+ /* check function arguments */
+ if (GH == NULL)
+ {
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+ if (N_dims <= 0)
+ {
+ CCTK_WARN (1, "N_dims argument must have a positive value");
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+ if (N_points < 0 || N_input_arrays < 0 || N_output_arrays < 0)
+ {
+ CCTK_WARN (1, "N_points, N_input_arrays, and N_output_arrays arguments "
+ "must have a non-negative value");
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+
+ /* right now we don't allow query calls only to the local interpolator
+ so N_points must be positive and the set of input/output arrays
+ must be non-empty */
+ if (pughGH->nprocs == 1 && N_points == 0)
+ {
+ return (0); /* no error */
+ }
+ if (N_input_arrays == 0 || N_output_arrays == 0)
+ {
+ CCTK_WARN (1, "number of input/output arrays must be non-zero");
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+ if ((N_points > 0 && interp_coords == NULL) || input_array_indices == NULL ||
+ output_array_types == NULL || output_arrays == NULL)
+ {
+ CCTK_WARN (1, "input/output array pointer arguments must be non-NULL");
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+
+ if (interp_coords_type != CCTK_VARIABLE_REAL)
+ {
+ CCTK_WARN (1, "interpolation coordinates must be of datatype CCTK_REAL");
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+
+ coord_system_name = CCTK_CoordSystemName (coord_system_handle);
+ if (coord_system_name == NULL)
+ {
+ return (UTIL_ERROR_BAD_HANDLE);
+ }
+ if (CCTK_CoordSystemDim (coord_system_name) < N_dims)
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "coordinate system '%s' has less dimensions than interpolation "
+ "coordinates (dim = %d)", coord_system_name, N_dims);
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+
+ /* create the parameter table for the local interpolator from the
+ user-supplied global and local parameter tables */
+ param_table_handle = CreateParameterTable (global_param_table_handle,
+ local_param_table_handle);
+ if (param_table_handle < 0)
+ {
+ return (param_table_handle);
+ }
+
+
+ /*************************************************************************/
+
+
+ /* allocate some temporary arrays */
+ origin_local = malloc (2 * N_dims * sizeof (CCTK_REAL));
+ delta_local = origin_local + N_dims;
+ input_arrays = malloc (N_input_arrays * sizeof (void *));
+ input_array_dims = malloc ((N_dims + N_input_arrays) * sizeof (CCTK_INT));
+ input_array_types = input_array_dims + N_dims;
+
+ /* get the extras pointer of the first coordinate
+ This is used later on to verify the layout of the input arrays as well
+ as for mapping points to processors. */
+ i = CCTK_CoordIndex (1, NULL, coord_system_name);
+ extras = ((const pGA *) pughGH->variables[i][0])->extras;
+
+ /* get the origin and delta of the processor-local grid,
+ copy the integer dimension array into an CCTK_INT array */
+ for (i = 0; i < N_dims; i++)
+ {
+ CCTK_CoordLocalRange (GH, &origin_local[i], &delta_local[i], i + 1,
+ NULL, coord_system_name);
+ delta_local[i] = (delta_local[i] - origin_local[i]) / extras->lnsize[i];
+ input_array_dims[i] = extras->lnsize[i];
+ }
+
+ /* check that the input arrays dimensions match the coordinate system
+ (but their dimensionality can be less) */
+ for (i = retval = 0; i < N_input_arrays; i++)
+ {
+ if (CCTK_GroupDynamicData (GH,
+ CCTK_GroupIndexFromVarI(input_array_indices[i]),
+ &group_data) < 0)
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Invalid input array index %d",
+ input_array_indices[i]);
+ retval = UTIL_ERROR_BAD_INPUT;
+ continue;
+ }
+
+ if (group_data.dim > N_dims)
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Input array variable with index %d has more dimensions "
+ "than coordinate system '%s'",
+ input_array_indices[i], coord_system_name);
+ retval = UTIL_ERROR_BAD_INPUT;
+ continue;
+ }
+
+ if (memcmp (group_data.lsh, extras->lnsize, group_data.dim * sizeof (int)))
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Dimensions of input array variable with index %d "
+ "doesn't match with coordinate system '%s'",
+ input_array_indices[i], coord_system_name);
+ retval = UTIL_ERROR_BAD_INPUT;
+ }
+
+ /* get the data pointer to the input array (use current timelevel)
+ and datatype */
+ input_arrays[i] = CCTK_VarDataPtrI (GH, 0, input_array_indices[i]);
+ input_array_types[i] = CCTK_VarTypeI (input_array_indices[i]);
+ }
+
+ /* single-processor case directly calls the local interpolator */
+ if (retval >= 0 && pughGH->nprocs == 1)
+ {
+ retval = CCTK_InterpLocalUniform (N_dims, local_interp_handle,
+ local_param_table_handle,
+ origin_local, delta_local, N_points,
+ interp_coords_type, interp_coords,
+ N_input_arrays, input_array_dims,
+ input_array_types, input_arrays,
+ N_output_arrays, output_array_types,
+ output_arrays);
+ }
+ if (retval < 0 || pughGH->nprocs == 1)
+ {
+ free (origin_local);
+ free (input_arrays);
+ free (input_array_dims);
+ Util_TableDestroy (param_table_handle);
+
+ return (retval);
+ }
+
+#ifdef CCTK_MPI
+ /*** Here follows the multi-processor case:
+ All processors locate their points to interpolate at
+ and exchange the coordinates so that every processor gets
+ those points which it can process locally.
+ After interpolation the results have to be sent back to the
+ requesting processors.
+ For both communications MPI_Alltoallv() is used.
+
+ In order to minimize the total number of MPI_Alltoallv() calls
+ (which are quite expensive) we collect the interpolation results
+ for all output arrays of the same CCTK data type into a single
+ communication buffer. That means, after communication the data
+ needs to be resorted from the buffer into the output arrays.
+ ***/
+
+ /* first of all, set up a structure with information of the
+ CCTK data types we have to deal with */
+
+ /* get the maximum value of the output array CCTK data types
+ NOTE: we assume that CCTK data types are defined as consecutive
+ positive constants starting from zero */
+ for (array = N_types = 0; array < N_output_arrays; array++)
+ {
+ if (N_types < output_array_types[array])
+ {
+ N_types = output_array_types[array];
+ }
+ }
+
+ /* now allocate an array of structures to describe all possible types */
+ type_desc = calloc (N_types + 1, sizeof (type_desc_t));
+
+ /* count the number of arrays of same type
+ (the N_arrays element was already initialized to zero by calloc() */
+ for (array = 0; array < N_output_arrays; array++)
+ {
+ type_desc[output_array_types[array]].N_arrays++;
+ }
+
+ /* fill in the type description information */
+ for (type = retval = 0, this = type_desc; type <= N_types; type++, this++)
+ {
+ if (this->N_arrays > 0)
+ {
+ /* get the variable type size in bytes */
+ this->vtypesize = CCTK_VarTypeSize (type);
+ if (this->vtypesize <= 0)
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "Invalid variable type %d passed, arrays of such type will "
+ "be skipped during interpolation", type);
+ this->N_arrays = 0;
+ continue;
+ }
+
+ /* get the MPI data type to use for communicating such a CCTK data type */
+ this->mpitype = PUGH_MPIDataType (pughGH, type);
+ if (this->mpitype == MPI_DATATYPE_NULL)
+ {
+ CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING,
+ "No MPI data type defined for variable type %d, arrays of "
+ "such type will be skipped during interpolation", type);
+ this->N_arrays = 0;
+ continue;
+ }
+
+ retval++;
+ }
+ }
+
+ /* check that there's at least one array with a valid CCTK data type */
+ if (retval <= 0)
+ {
+ free (origin_local);
+ free (input_arrays);
+ free (input_array_dims);
+ free (type_desc);
+ Util_TableDestroy (param_table_handle);
+
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+
+ /* map the requested points to interpolate at onto the processors
+ they belong to and gather the coordinates of all the points local to
+ this processor
+
+ the number of processor-local points is returned in N_points_local,
+ their coordinates in interp_coords_local */
+
+ /* this holds the proccessor for *each* of N_points points */
+ myGH->whichproc = NULL;
+ if (N_points > 0)
+ {
+ myGH->whichproc = malloc (2 * N_points * sizeof (int));
+ }
+ /* indices[] is used to make the sorting easier
+ when receiving the output data */
+ myGH->indices = myGH->whichproc + N_points;
+
+ /* initialize whichproc with invalid processor number -1 */
+ for (point = 0; point < N_points; point++)
+ {
+ myGH->whichproc[point] = -1;
+ }
+
+ /* initialize N_points_from to 0 for counting it up in the following loop */
+ memset (myGH->N_points_from, 0, pughGH->nprocs * sizeof (CCTK_INT));
+
+ /* allocate the ranges for my local coordinates */
+ range_min = malloc (4 * N_dims * sizeof (CCTK_REAL));
+ range_max = range_min + 1*N_dims;
+ origin_global = range_min + 2*N_dims;
+ delta_global = range_min + 3*N_dims;
+
+ /* get the global origin and delta of the coordinate system */
+ for (i = 0; i < N_dims; i++)
+ {
+ CCTK_CoordRange (GH, &origin_global[i], &delta_global[i], i+1, NULL,
+ coord_system_name);
+ delta_global[i] = (delta_global[i]-origin_global[i]) / (extras->nsize[i]-1);
+ }
+
+ /* locate the points to interpolate at */
+ for (proc = 0; proc < pughGH->nprocs; proc++)
+ {
+ for (i = 0; i < N_dims; i++)
+ {
+ /* compute the coordinate ranges */
+ /* TODO: use bbox instead -- but the bboxes of other processors
+ are not known */
+ int const has_lower = extras->lb[proc][i] == 0;
+ int const has_upper = extras->ub[proc][i] == GH->cctk_gsh[i]-1;
+ range_min[i] = origin_global[i] + (extras->lb[proc][i] + (!has_lower) * (extras->nghostzones[i]-0.5) - FUDGE)*delta_global[i];
+ range_max[i] = origin_global[i] + (extras->ub[proc][i] - (!has_upper) * (extras->nghostzones[i]-0.5) + FUDGE)*delta_global[i];
+ }
+
+ /* and now which point will be processed by what processor */
+ for (point = 0; point < N_points; point++)
+ {
+ /* skip points which already have been located */
+ if (myGH->whichproc[point] >= 0)
+ {
+ continue;
+ }
+
+ /* check if the point belongs to this processor
+ (must be within min/max in all dimensions) */
+ for (i = j = 0; i < N_dims; i++)
+ {
+ if (((const CCTK_REAL *) interp_coords[i])[point] >= range_min[i] &&
+ ((const CCTK_REAL *) interp_coords[i])[point] <= range_max[i])
+ {
+ j++;
+ }
+ }
+ if (j == N_dims)
+ {
+ myGH->whichproc[point] = proc;
+ myGH->N_points_from[proc]++;
+ }
+ }
+ }
+ /* don't need this anymore */
+ free (range_min);
+
+ /* make sure that all points could be mapped onto a processor */
+ for (point = j = 0; point < N_points; point++)
+ {
+ if (myGH->whichproc[point] < 0)
+ {
+ msg = malloc (80 + N_dims*20);
+ sprintf (msg, "Unable to locate point %d [%f",
+ point, (double) ((const CCTK_REAL *) interp_coords[0])[point]);
+ for (i = 1; i < N_dims; i++)
+ {
+ sprintf (msg, "%s %f",
+ msg, (double) ((const CCTK_REAL *) interp_coords[i])[point]);
+ }
+ sprintf (msg, "%s]", msg);
+ CCTK_WARN (1, msg);
+ free (msg);
+ j = 1; /* mark as error */
+ }
+ }
+ if (j)
+ {
+ if (myGH->whichproc)
+ {
+ free (myGH->whichproc);
+ myGH->whichproc = NULL;
+ }
+ free (origin_local);
+ free (input_arrays);
+ free (input_array_dims);
+ free (type_desc);
+ Util_TableDestroy (param_table_handle);
+
+ return (UTIL_ERROR_BAD_INPUT);
+ }
+
+ /* Now we want to resolve the N_points_from[]. Currently this is
+ the form of ( in 2 proc mode )
+ P1: Num from P1 NFP2
+ P2: NFP1 NFP2
+
+ and this needs to become
+ P1: Num to P1 NTP2
+ P2: NTP1 NTP1
+
+ Since NTP1 = NFP2 (and this works in more proc mode too)
+ this is an all-to-all communication.
+ */
+ CACTUS_MPI_ERROR (MPI_Alltoall (myGH->N_points_from, 1, PUGH_MPI_INT,
+ myGH->N_points_to, 1, PUGH_MPI_INT,
+ pughGH->PUGH_COMM_WORLD));
+
+#ifdef PUGHINTERP_DEBUG
+ for (proc = 0; proc < pughGH->nprocs; proc++)
+ {
+ printf ("processor %d <-> %d From: %d To: %d\n",
+ CCTK_MyProc (GH), proc, myGH->N_points_from[proc],
+ myGH->N_points_to[proc]);
+ }
+#endif
+
+ /* Great. Now we know how many to expect from each processor,
+ and how many to send to each processor. So first we have
+ to send the locations to the processors which hold our data.
+ This means I send interp_coords[i][point] to whichproc[point].
+ I have N_points_from[proc] to send to each processor.
+ */
+
+ /* This is backwards in the broadcast location; the number of points
+ we are getting is how many everyone else is sending to us,
+ eg, N_points_to, not how many we get back from everyone else,
+ eg, N_points_from. The number we are sending, of course, is
+ all of our locations, eg, N_points */
+ for (proc = N_points_local = 0; proc < pughGH->nprocs; proc++)
+ {
+ N_points_local += myGH->N_points_to[proc];
+ }
+
+#ifdef PUGHINTERP_DEBUG
+ printf ("processor %d gets %d points in total\n",
+ CCTK_MyProc (GH), N_points_local);
+#endif
+
+ /* allocate the local coordinates array (sorted in processor order) */
+ interp_coords_proc = NULL;
+ if (N_points > 0)
+ {
+ interp_coords_proc = malloc (N_dims * N_points * sizeof (CCTK_REAL));
+ }
+
+ /* now sort my own coordinates as tupels of [N_dims] */
+ for (proc = j = 0; proc < pughGH->nprocs; proc++)
+ {
+ for (point = 0; point < N_points; point++)
+ {
+ if (myGH->whichproc[point] == proc)
+ {
+ for (i = 0; i < N_dims; i++)
+ {
+ interp_coords_proc[N_dims*j + i] =
+ ((const CCTK_REAL *) interp_coords[i])[point];
+ }
+ myGH->indices[j++] = point;
+ }
+ }
+ }
+
+ /* So load up the send and recv stuff */
+ /* Send N_dims elements per data point */
+ myGH->sendcnt[0] = N_dims * myGH->N_points_from[0];
+ myGH->recvcnt[0] = N_dims * myGH->N_points_to[0];
+ myGH->senddispl[0] = myGH->recvdispl[0] = 0;
+ for (proc = 1; proc < pughGH->nprocs; proc++)
+ {
+ myGH->sendcnt[proc] = N_dims * myGH->N_points_from[proc];
+ myGH->recvcnt[proc] = N_dims * myGH->N_points_to[proc];
+ myGH->senddispl[proc] = myGH->senddispl[proc-1] + myGH->sendcnt[proc-1];
+ myGH->recvdispl[proc] = myGH->recvdispl[proc-1] + myGH->recvcnt[proc-1];
+ }
+
+ /* Great, and now exchange the coordinates and collect the ones
+ that I have to process in *interp_coords_local[] */
+ coords = NULL;
+ if (N_points_local > 0)
+ {
+ coords = malloc (N_dims * N_points_local * sizeof (CCTK_REAL));
+ }
+ CACTUS_MPI_ERROR (MPI_Alltoallv (interp_coords_proc, myGH->sendcnt,
+ myGH->senddispl, PUGH_MPI_REAL,
+ coords, myGH->recvcnt,
+ myGH->recvdispl, PUGH_MPI_REAL,
+ pughGH->PUGH_COMM_WORLD));
+
+ /* don't need this anymore */
+ if (interp_coords_proc)
+ {
+ free (interp_coords_proc);
+ }
+
+ /* finally, sort the local coordinates array (which is flat one-dimensional)
+ into the interp_coords[N_dim][N_points_local] array */
+ interp_coords_local = malloc (N_dims * sizeof (CCTK_REAL *));
+ for (i = 0; i < N_dims; i++)
+ {
+ interp_coords_local[i] = NULL;
+ if (N_points_local > 0)
+ {
+ interp_coords_local[i] = malloc (N_points_local * sizeof (CCTK_REAL));
+ for (point = 0; point < N_points_local; point++)
+ {
+ interp_coords_local[i][point] = coords[point*N_dims + i];
+ }
+ }
+ }
+
+ if (coords)
+ {
+ free (coords);
+ }
+
+ /* allocate intermediate output arrays for local interpolation */
+ output_arrays_local = calloc (N_output_arrays, sizeof (void *));
+ if (N_points_local > 0)
+ {
+ for (array = 0; array < N_output_arrays; array++)
+ {
+ this = type_desc + output_array_types[array];
+ output_arrays_local[array] = malloc (N_points_local * this->vtypesize);
+ }
+
+ /* now call the local interpolator for all local points and store
+ the results in the intermediate local output arrays */
+ retval = CCTK_InterpLocalUniform (N_dims, local_interp_handle,
+ local_param_table_handle,
+ origin_local, delta_local, N_points_local,
+ interp_coords_type,
+ (const void **) interp_coords_local,
+ N_input_arrays, input_array_dims,
+ input_array_types, input_arrays,
+ N_output_arrays, output_array_types,
+ (void **) output_arrays_local);
+
+ /* don't need these anymore */
+ for (i = 0; i < N_dims; i++)
+ {
+ free (interp_coords_local[i]);
+ }
+ }
+
+ /* clean up some intermediate arrays */
+ free (interp_coords_local);
+ free (origin_local);
+ free (input_arrays);
+ free (input_array_dims);
+
+ /* now send the interpolation results to the processors which requested them,
+ and also receive my own results that were computed remotely.
+ Before we can do the communication in one go (for each datatype, of course)
+ we have to sort the results from the intermediate output arrays, which the
+ local interpolator wanted, into a single contiguous communication buffer.*/
+ for (type = 0, this = type_desc; type <= N_types; type++, this++)
+ {
+ /* skip unused types */
+ if (this->N_arrays <= 0)
+ {
+ continue;
+ }
+
+ /* set up the communication (this is type-independent) */
+ myGH->sendcnt[0] = this->N_arrays * myGH->N_points_to[0];
+ myGH->recvcnt[0] = this->N_arrays * myGH->N_points_from[0];
+ myGH->senddispl[0] = myGH->recvdispl[0] = 0;
+ for (proc = 1; proc < pughGH->nprocs; proc++)
+ {
+ myGH->sendcnt[proc] = this->N_arrays * myGH->N_points_to[proc];
+ myGH->recvcnt[proc] = this->N_arrays * myGH->N_points_from[proc];
+ myGH->senddispl[proc] = myGH->senddispl[proc-1] + myGH->sendcnt[proc-1];
+ myGH->recvdispl[proc] = myGH->recvdispl[proc-1] + myGH->recvcnt[proc-1];
+ }
+
+ /* Allocate contiguous communication buffer for each datatype into which
+ the local interpolation results from all input arrays of that datatype
+ will be written to.
+ If there are no points to send/receive by this processor
+ set the buffer pointer to an invalid but non-NULL value
+ otherwise we might get trouble with NULL pointers in MPI_Alltoallv () */
+
+ /* dereferencing such an address should code crash on most systems */
+ this->sendbuf = this->recvbuf = (void *) this->vtypesize;
+
+ /* here goes the allocation for the send buffer, along with copying the
+ results from the intermediate local output arrays */
+ if (N_points_local > 0)
+ {
+ this->buf = malloc (this->N_arrays * N_points_local * this->vtypesize);
+ this->sendbuf = this->buf;
+
+ for (proc = offset = 0; proc < pughGH->nprocs; proc++)
+ {
+ for (array = 0; array < N_output_arrays; array++)
+ {
+ if (output_array_types[array] != type)
+ {
+ continue;
+ }
+
+ memcpy (this->buf, output_arrays_local[array] + offset,
+ myGH->N_points_to[proc] * this->vtypesize);
+ this->buf += myGH->N_points_to[proc] * this->vtypesize;
+ }
+ offset += myGH->N_points_to[proc] * this->vtypesize;
+ }
+ }
+
+ /* receive buffer is easy */
+ if (N_points > 0)
+ {
+ this->recvbuf = malloc (this->N_arrays * N_points*this->vtypesize);
+ }
+
+ /* now do the global exchange for this datatype */
+ CACTUS_MPI_ERROR (MPI_Alltoallv (this->sendbuf, myGH->sendcnt,
+ myGH->senddispl, this->mpitype,
+ this->recvbuf, myGH->recvcnt,
+ myGH->recvdispl, this->mpitype,
+ pughGH->PUGH_COMM_WORLD));
+
+ /* now that the data is sent we don't need the send buffer anymore */
+ if (N_points_local > 0)
+ {
+ free (this->sendbuf);
+ }
+
+ /* no sort neccessary if there are no points */
+ if (N_points <= 0)
+ {
+ continue;
+ }
+
+ /* go back from processor-sorted data to input-ordered data.
+ The creation of the indices array above makes this not so bad. */
+ this->buf = this->recvbuf;
+ for (proc = offset = 0; proc < pughGH->nprocs; proc++)
+ {
+ if (myGH->N_points_from[proc] <= 0)
+ {
+ continue;
+ }
+
+ for (array = 0; array < N_output_arrays; array++)
+ {
+ if (output_array_types[array] != type)
+ {
+ continue;
+ }
+
+ /* now do the sorting according to the CCTK data type */
+ switch (output_array_types[array])
+ {
+ case CCTK_VARIABLE_CHAR: SORT_TYPED_ARRAY (CCTK_BYTE); break;
+ case CCTK_VARIABLE_INT: SORT_TYPED_ARRAY (CCTK_INT); break;
+ case CCTK_VARIABLE_REAL: SORT_TYPED_ARRAY (CCTK_REAL); break;
+ case CCTK_VARIABLE_COMPLEX: SORT_TYPED_ARRAY (CCTK_COMPLEX); break;
+#ifdef CCTK_REAL4
+ case CCTK_VARIABLE_REAL4: SORT_TYPED_ARRAY (CCTK_REAL4); break;
+ case CCTK_VARIABLE_COMPLEX8: SORT_TYPED_ARRAY (CCTK_COMPLEX8); break;
+#endif
+#ifdef CCTK_REAL8
+ case CCTK_VARIABLE_REAL8: SORT_TYPED_ARRAY (CCTK_REAL8); break;
+ case CCTK_VARIABLE_COMPLEX16: SORT_TYPED_ARRAY (CCTK_COMPLEX16);break;
+#endif
+#ifdef CCTK_REAL16
+ case CCTK_VARIABLE_REAL16: SORT_TYPED_ARRAY (CCTK_REAL16); break;
+ case CCTK_VARIABLE_COMPLEX32: SORT_TYPED_ARRAY (CCTK_COMPLEX32);break;
+#endif
+ default: CCTK_WARN (0, "Implementation error");
+ }
+
+ } /* end of loop over all output arrays */
+
+ /* advance the offset into the communication receive buffer */
+ offset += myGH->N_points_from[proc];
+
+ } /* end of loop over all processors */
+
+ /* this communication receive buffer isn't needed anymore */
+ free (this->recvbuf);
+
+ } /* end of loop over all types */
+
+ /* free intermediate output arrays */
+ for (array = 0; array < N_output_arrays; array++)
+ {
+ if (output_arrays_local[array])
+ {
+ free (output_arrays_local[array]);
+ }
+ }
+ free (output_arrays_local);
+
+ /* free remaining resources allocated within this run */
+ if (myGH->whichproc)
+ {
+ free (myGH->whichproc);
+ myGH->whichproc = NULL;
+ }
+ free (type_desc);
+#endif /* MPI */
+
+ Util_TableDestroy (param_table_handle);
+
+ return (retval);
+}
+
+
+/********************************************************************
+ ******************** Internal Routines ************************
+ ********************************************************************/
+ /*@@
+ @routine CreateParameterTable
+ @date Fri 20 Dec 2002
+ @author Thomas Radke
+ @desc
+ Parses the options given in the user-supplied global parameter
+ table and creates a parameter table to be passed to the local
+ interpolator.
+ The table is cloned from the user-supplied local parameter table
+ - or created as an empty table if there is no local parameter
+ table - and then completed with options from the global
+ interpolator.
+ @enddesc
+
+ @var global_param_table_handle
+ @vdesc handle to the user-supplied global parameter table
+ @vtype int
+ @vio in
+ @endvar
+ @var local_param_table_handle
+ @vdesc handle to the user-supplied local parameter table
+ @vtype int
+ @vio in
+ @endvar
+
+ @returntype int
+ @returndesc
+ handle to the new local parameter table, or<BR>
+ one of the UTIL_ERROR_TABLE_* error codes
+ @endreturndesc
+@@*/
+static int CreateParameterTable (int global_param_table_handle,
+ int local_param_table_handle)
+{
+ int retval;
+
+
+ retval = local_param_table_handle >= 0 ?
+ Util_TableClone (local_param_table_handle) : Util_TableCreate (0);
+ if (retval < 0)
+ {
+ CCTK_WARN (1, "couldn't create/clone local interpolator parameter table");
+ }
+
+ /* FIXME: must evaluate options from global parameter table */
+ if (global_param_table_handle >= 0)
+ {
+ CCTK_WARN (1, "options in global interpolator parameter table are ignored "
+ "so far");
+ }
+
+ return (retval);
+}
diff --git a/src/make.code.defn b/src/make.code.defn
index b8d41c1..eba2a31 100644
--- a/src/make.code.defn
+++ b/src/make.code.defn
@@ -2,4 +2,4 @@
# $Header$
# Source files in this directory
-SRCS = Startup.c Operator.c Interpolate.c
+SRCS = Startup.c Operator.c Interpolate.c InterpGridArrays.c
diff --git a/src/pughInterpGH.h b/src/pughInterpGH.h
index 543d1d5..2abd51b 100644
--- a/src/pughInterpGH.h
+++ b/src/pughInterpGH.h
@@ -32,6 +32,22 @@ typedef struct
} pughInterpGH; /* point-sorted order */
+/* prototype of PUGHInterp's routine which overloads CCTK_InterpGridArrays() */
+int PUGHInterp_InterpGridArrays (const cGH *GH,
+ int N_dims,
+ int global_param_table_handle,
+ int local_param_table_handle,
+ int local_interp_handle,
+ int coord_system_handle,
+ int N_interp_points,
+ int interp_coords_type,
+ const void *const interp_coords[],
+ int N_input_arrays,
+ const CCTK_INT input_array_indices[],
+ int N_output_arrays,
+ const CCTK_INT output_array_types[],
+ void *const output_arrays[]);
+
/* prototypes of interpolation operators to be registered */
int PUGHInterp_InterpGV (cGH *GH,
int order,