From f85a8454dd5dad7e6537e06f1969a933512c5802 Mon Sep 17 00:00:00 2001 From: tradke Date: Fri, 20 Dec 2002 12:38:53 +0000 Subject: Initial version of PUGHInterp_InterpGridArrays() which implements the new global interpolation API and overloads CCTK_InterpGridArrays(). No table options are evaluated yet. git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHInterp/trunk@36 1c20744c-e24a-42ec-9533-f5004cb800e5 --- src/InterpGridArrays.c | 942 +++++++++++++++++++++++++++++++++++++++++++++++++ src/make.code.defn | 2 +- src/pughInterpGH.h | 16 + 3 files changed, 959 insertions(+), 1 deletion(-) create mode 100644 src/InterpGridArrays.c (limited to 'src') diff --git a/src/InterpGridArrays.c b/src/InterpGridArrays.c new file mode 100644 index 0000000..60eca3f --- /dev/null +++ b/src/InterpGridArrays.c @@ -0,0 +1,942 @@ + /*@@ + @file InterpGridArrays.c + @date Tue 10 Dec 2002 + @author Thomas Radke + @desc + Implementation of PUGHInterp's global interpolator routine + which overloads CCTK_InterpGridArrays() + @enddesc + + @history + @date Tue 10 Dec 2002 + @author Thomas Radke + @hdesc source code copied from Operator.c which implements the old + API for global interpolation + @endhistory + @version $Id$ + @@*/ + +#include +#include + +#include "util_ErrorCodes.h" +#include "util_Table.h" +#include "cctk.h" +#include "CactusPUGH/PUGH/src/include/pugh.h" +#include "pughInterpGH.h" + +/* the rcs ID and its dummy function to use it */ +static const char *rcsid = "$Header$"; +CCTK_FILEVERSION(CactusPUGH_PUGHInterp_InterpGridArrays_c) + + +/******************************************************************** + ******************** Macro Definitions ************************ + ********************************************************************/ +/* uncomment this to get some debugging output */ +/* #define PUGHINTERP_DEBUG 1 */ + +/* fudge factor for mapping points onto processors */ +#define FUDGE 0.0 + +/* macro do sort interpolation results from a single communication buffer + into their appropriate output arrays */ +#define SORT_TYPED_ARRAY(cctk_type) \ + { \ + int _i; \ + cctk_type *_src, *_dst; \ + \ + \ + _src = (cctk_type *) this->buf; \ + _dst = (cctk_type *) output_arrays[array]; \ + for (_i = 0; _i < myGH->N_points_from[proc]; _i++) \ + { \ + _dst[myGH->indices[_i + offset]] = *_src++; \ + } \ + this->buf = (char *) _src; \ + } + + + +/******************************************************************** + *********************** Type Definitions ********************** + ********************************************************************/ +#ifdef CCTK_MPI +/* internal structure describing a handle for a single CCTK data type */ +typedef struct +{ + int vtypesize; /* variable type's size in bytes */ + MPI_Datatype mpitype; /* corresponding MPI datatype */ + int N_arrays; /* number of in/out arrays */ + void *sendbuf; /* communication send buffer for this type */ + void *recvbuf; /* communication receive buffer for this type */ + char *buf; /* work pointer for sendbuf */ +} type_desc_t; +#endif + + +/******************************************************************** + ******************** Internal Routines ************************ + ********************************************************************/ +static int CreateParameterTable (int global_param_table_handle, + int local_param_table_handle); + + +/*@@ + @routine PUGHInterp_InterpGridArrays + @date Mon 16 Dec 2002 + @author Thomas Radke + @desc + PUGHInterp's interpolation routine for distributed grid arrays. + This routine overloads CCTK_InterpGridArrays(). + @enddesc + + @var GH + @vdesc pointer to CCTK grid hierarchy + @vtype const cGH * + @vio in + @endvar + @var N_dims + @vdesc number of dimensions for the interpolation + @vtype int + @vio in + @endvar + @var global_param_table_handle + @vdesc parameter table handle for passing optional parameters to the + global interpolator routine + @vtype int + @vio in + @endvar + @var local_param_table_handle + @vdesc parameter table handle for passing optional parameters to the + local interpolator routine + @vtype int + @vio in + @endvar + @var coord_system_handle + @vdesc handle for the underlying coordinate system + @vtype int + @vio in + @endvar + @var N_points + @vdesc number of points to interpolate at + @vtype int + @vio in + @endvar + @var interp_coords_type + @vdesc CCTK datatype of the coordinate arrays as passed via + (common datatype for all arrays) + @vtype int + @vio in + @endvar + @var interp_coords + @vdesc list of arrays with coordinate for + points to interpolate at + @vtype const void *const [] + @vio in + @endvar + @var N_input_arrays + @vdesc number of input arrays + @vtype int + @vio in + @endvar + @var input_array_indices + @vdesc list of grid variables (given by their indices) + to interpolate + @vtype const CCTK_INT [] + @vio in + @endvar + @var N_output_arrays + @vdesc number of output arrays + @vtype int + @vio in + @endvar + @var out_array_types + @vdesc list of requested CCTK datatypes for the + output arrays + @vtype const CCTK_INT [] + @vio in + @endvar + @var output_arrays + @vdesc list of output arrays (given by their pointers) + which receive the interpolation results + @vtype void *const [] + @vio out + @endvar + + @returntype int + @returndesc + 0 - successful interpolation + -1 - in case of any errors + @endreturndesc +@@*/ +int PUGHInterp_InterpGridArrays (const cGH *GH, + int N_dims, + int global_param_table_handle, + int local_param_table_handle, + int local_interp_handle, + int coord_system_handle, + int N_points, + int interp_coords_type, + const void *const interp_coords[], + int N_input_arrays, + const CCTK_INT input_array_indices[], + int N_output_arrays, + const CCTK_INT output_array_types[], + void *const output_arrays[]) +{ + int i, param_table_handle, retval; + CCTK_REAL *origin_local, *delta_local; + CCTK_INT *input_array_dims, *input_array_types; + CCTK_REAL **interp_coords_local; + const void **input_arrays; + const char *coord_system_name; + const pGH *pughGH; + const pGExtras *extras; + cGroupDynamicData group_data; +#ifdef CCTK_MPI + pughInterpGH *myGH; + int N_points_local, N_types; + int j, point, proc, array, type, offset; + char *msg; + char **output_arrays_local; + type_desc_t *this, *type_desc; + CCTK_REAL *range_min, *range_max; + CCTK_REAL *origin_global, *delta_global; + CCTK_REAL *interp_coords_proc, *coords; +#endif + + + /* get GH extension handles for PUGHInterp and PUGH */ + myGH = CCTK_GHExtension (GH, "PUGHInterp"); + pughGH = CCTK_GHExtension (GH, "PUGH"); + + /* check function arguments */ + if (GH == NULL) + { + return (UTIL_ERROR_BAD_INPUT); + } + if (N_dims <= 0) + { + CCTK_WARN (1, "N_dims argument must have a positive value"); + return (UTIL_ERROR_BAD_INPUT); + } + if (N_points < 0 || N_input_arrays < 0 || N_output_arrays < 0) + { + CCTK_WARN (1, "N_points, N_input_arrays, and N_output_arrays arguments " + "must have a non-negative value"); + return (UTIL_ERROR_BAD_INPUT); + } + + /* right now we don't allow query calls only to the local interpolator + so N_points must be positive and the set of input/output arrays + must be non-empty */ + if (pughGH->nprocs == 1 && N_points == 0) + { + return (0); /* no error */ + } + if (N_input_arrays == 0 || N_output_arrays == 0) + { + CCTK_WARN (1, "number of input/output arrays must be non-zero"); + return (UTIL_ERROR_BAD_INPUT); + } + if ((N_points > 0 && interp_coords == NULL) || input_array_indices == NULL || + output_array_types == NULL || output_arrays == NULL) + { + CCTK_WARN (1, "input/output array pointer arguments must be non-NULL"); + return (UTIL_ERROR_BAD_INPUT); + } + + if (interp_coords_type != CCTK_VARIABLE_REAL) + { + CCTK_WARN (1, "interpolation coordinates must be of datatype CCTK_REAL"); + return (UTIL_ERROR_BAD_INPUT); + } + + coord_system_name = CCTK_CoordSystemName (coord_system_handle); + if (coord_system_name == NULL) + { + return (UTIL_ERROR_BAD_HANDLE); + } + if (CCTK_CoordSystemDim (coord_system_name) < N_dims) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "coordinate system '%s' has less dimensions than interpolation " + "coordinates (dim = %d)", coord_system_name, N_dims); + return (UTIL_ERROR_BAD_INPUT); + } + + /* create the parameter table for the local interpolator from the + user-supplied global and local parameter tables */ + param_table_handle = CreateParameterTable (global_param_table_handle, + local_param_table_handle); + if (param_table_handle < 0) + { + return (param_table_handle); + } + + + /*************************************************************************/ + + + /* allocate some temporary arrays */ + origin_local = malloc (2 * N_dims * sizeof (CCTK_REAL)); + delta_local = origin_local + N_dims; + input_arrays = malloc (N_input_arrays * sizeof (void *)); + input_array_dims = malloc ((N_dims + N_input_arrays) * sizeof (CCTK_INT)); + input_array_types = input_array_dims + N_dims; + + /* get the extras pointer of the first coordinate + This is used later on to verify the layout of the input arrays as well + as for mapping points to processors. */ + i = CCTK_CoordIndex (1, NULL, coord_system_name); + extras = ((const pGA *) pughGH->variables[i][0])->extras; + + /* get the origin and delta of the processor-local grid, + copy the integer dimension array into an CCTK_INT array */ + for (i = 0; i < N_dims; i++) + { + CCTK_CoordLocalRange (GH, &origin_local[i], &delta_local[i], i + 1, + NULL, coord_system_name); + delta_local[i] = (delta_local[i] - origin_local[i]) / extras->lnsize[i]; + input_array_dims[i] = extras->lnsize[i]; + } + + /* check that the input arrays dimensions match the coordinate system + (but their dimensionality can be less) */ + for (i = retval = 0; i < N_input_arrays; i++) + { + if (CCTK_GroupDynamicData (GH, + CCTK_GroupIndexFromVarI(input_array_indices[i]), + &group_data) < 0) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "Invalid input array index %d", + input_array_indices[i]); + retval = UTIL_ERROR_BAD_INPUT; + continue; + } + + if (group_data.dim > N_dims) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "Input array variable with index %d has more dimensions " + "than coordinate system '%s'", + input_array_indices[i], coord_system_name); + retval = UTIL_ERROR_BAD_INPUT; + continue; + } + + if (memcmp (group_data.lsh, extras->lnsize, group_data.dim * sizeof (int))) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "Dimensions of input array variable with index %d " + "doesn't match with coordinate system '%s'", + input_array_indices[i], coord_system_name); + retval = UTIL_ERROR_BAD_INPUT; + } + + /* get the data pointer to the input array (use current timelevel) + and datatype */ + input_arrays[i] = CCTK_VarDataPtrI (GH, 0, input_array_indices[i]); + input_array_types[i] = CCTK_VarTypeI (input_array_indices[i]); + } + + /* single-processor case directly calls the local interpolator */ + if (retval >= 0 && pughGH->nprocs == 1) + { + retval = CCTK_InterpLocalUniform (N_dims, local_interp_handle, + local_param_table_handle, + origin_local, delta_local, N_points, + interp_coords_type, interp_coords, + N_input_arrays, input_array_dims, + input_array_types, input_arrays, + N_output_arrays, output_array_types, + output_arrays); + } + if (retval < 0 || pughGH->nprocs == 1) + { + free (origin_local); + free (input_arrays); + free (input_array_dims); + Util_TableDestroy (param_table_handle); + + return (retval); + } + +#ifdef CCTK_MPI + /*** Here follows the multi-processor case: + All processors locate their points to interpolate at + and exchange the coordinates so that every processor gets + those points which it can process locally. + After interpolation the results have to be sent back to the + requesting processors. + For both communications MPI_Alltoallv() is used. + + In order to minimize the total number of MPI_Alltoallv() calls + (which are quite expensive) we collect the interpolation results + for all output arrays of the same CCTK data type into a single + communication buffer. That means, after communication the data + needs to be resorted from the buffer into the output arrays. + ***/ + + /* first of all, set up a structure with information of the + CCTK data types we have to deal with */ + + /* get the maximum value of the output array CCTK data types + NOTE: we assume that CCTK data types are defined as consecutive + positive constants starting from zero */ + for (array = N_types = 0; array < N_output_arrays; array++) + { + if (N_types < output_array_types[array]) + { + N_types = output_array_types[array]; + } + } + + /* now allocate an array of structures to describe all possible types */ + type_desc = calloc (N_types + 1, sizeof (type_desc_t)); + + /* count the number of arrays of same type + (the N_arrays element was already initialized to zero by calloc() */ + for (array = 0; array < N_output_arrays; array++) + { + type_desc[output_array_types[array]].N_arrays++; + } + + /* fill in the type description information */ + for (type = retval = 0, this = type_desc; type <= N_types; type++, this++) + { + if (this->N_arrays > 0) + { + /* get the variable type size in bytes */ + this->vtypesize = CCTK_VarTypeSize (type); + if (this->vtypesize <= 0) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "Invalid variable type %d passed, arrays of such type will " + "be skipped during interpolation", type); + this->N_arrays = 0; + continue; + } + + /* get the MPI data type to use for communicating such a CCTK data type */ + this->mpitype = PUGH_MPIDataType (pughGH, type); + if (this->mpitype == MPI_DATATYPE_NULL) + { + CCTK_VWarn (1, __LINE__, __FILE__, CCTK_THORNSTRING, + "No MPI data type defined for variable type %d, arrays of " + "such type will be skipped during interpolation", type); + this->N_arrays = 0; + continue; + } + + retval++; + } + } + + /* check that there's at least one array with a valid CCTK data type */ + if (retval <= 0) + { + free (origin_local); + free (input_arrays); + free (input_array_dims); + free (type_desc); + Util_TableDestroy (param_table_handle); + + return (UTIL_ERROR_BAD_INPUT); + } + + /* map the requested points to interpolate at onto the processors + they belong to and gather the coordinates of all the points local to + this processor + + the number of processor-local points is returned in N_points_local, + their coordinates in interp_coords_local */ + + /* this holds the proccessor for *each* of N_points points */ + myGH->whichproc = NULL; + if (N_points > 0) + { + myGH->whichproc = malloc (2 * N_points * sizeof (int)); + } + /* indices[] is used to make the sorting easier + when receiving the output data */ + myGH->indices = myGH->whichproc + N_points; + + /* initialize whichproc with invalid processor number -1 */ + for (point = 0; point < N_points; point++) + { + myGH->whichproc[point] = -1; + } + + /* initialize N_points_from to 0 for counting it up in the following loop */ + memset (myGH->N_points_from, 0, pughGH->nprocs * sizeof (CCTK_INT)); + + /* allocate the ranges for my local coordinates */ + range_min = malloc (4 * N_dims * sizeof (CCTK_REAL)); + range_max = range_min + 1*N_dims; + origin_global = range_min + 2*N_dims; + delta_global = range_min + 3*N_dims; + + /* get the global origin and delta of the coordinate system */ + for (i = 0; i < N_dims; i++) + { + CCTK_CoordRange (GH, &origin_global[i], &delta_global[i], i+1, NULL, + coord_system_name); + delta_global[i] = (delta_global[i]-origin_global[i]) / (extras->nsize[i]-1); + } + + /* locate the points to interpolate at */ + for (proc = 0; proc < pughGH->nprocs; proc++) + { + for (i = 0; i < N_dims; i++) + { + /* compute the coordinate ranges */ + /* TODO: use bbox instead -- but the bboxes of other processors + are not known */ + int const has_lower = extras->lb[proc][i] == 0; + int const has_upper = extras->ub[proc][i] == GH->cctk_gsh[i]-1; + range_min[i] = origin_global[i] + (extras->lb[proc][i] + (!has_lower) * (extras->nghostzones[i]-0.5) - FUDGE)*delta_global[i]; + range_max[i] = origin_global[i] + (extras->ub[proc][i] - (!has_upper) * (extras->nghostzones[i]-0.5) + FUDGE)*delta_global[i]; + } + + /* and now which point will be processed by what processor */ + for (point = 0; point < N_points; point++) + { + /* skip points which already have been located */ + if (myGH->whichproc[point] >= 0) + { + continue; + } + + /* check if the point belongs to this processor + (must be within min/max in all dimensions) */ + for (i = j = 0; i < N_dims; i++) + { + if (((const CCTK_REAL *) interp_coords[i])[point] >= range_min[i] && + ((const CCTK_REAL *) interp_coords[i])[point] <= range_max[i]) + { + j++; + } + } + if (j == N_dims) + { + myGH->whichproc[point] = proc; + myGH->N_points_from[proc]++; + } + } + } + /* don't need this anymore */ + free (range_min); + + /* make sure that all points could be mapped onto a processor */ + for (point = j = 0; point < N_points; point++) + { + if (myGH->whichproc[point] < 0) + { + msg = malloc (80 + N_dims*20); + sprintf (msg, "Unable to locate point %d [%f", + point, (double) ((const CCTK_REAL *) interp_coords[0])[point]); + for (i = 1; i < N_dims; i++) + { + sprintf (msg, "%s %f", + msg, (double) ((const CCTK_REAL *) interp_coords[i])[point]); + } + sprintf (msg, "%s]", msg); + CCTK_WARN (1, msg); + free (msg); + j = 1; /* mark as error */ + } + } + if (j) + { + if (myGH->whichproc) + { + free (myGH->whichproc); + myGH->whichproc = NULL; + } + free (origin_local); + free (input_arrays); + free (input_array_dims); + free (type_desc); + Util_TableDestroy (param_table_handle); + + return (UTIL_ERROR_BAD_INPUT); + } + + /* Now we want to resolve the N_points_from[]. Currently this is + the form of ( in 2 proc mode ) + P1: Num from P1 NFP2 + P2: NFP1 NFP2 + + and this needs to become + P1: Num to P1 NTP2 + P2: NTP1 NTP1 + + Since NTP1 = NFP2 (and this works in more proc mode too) + this is an all-to-all communication. + */ + CACTUS_MPI_ERROR (MPI_Alltoall (myGH->N_points_from, 1, PUGH_MPI_INT, + myGH->N_points_to, 1, PUGH_MPI_INT, + pughGH->PUGH_COMM_WORLD)); + +#ifdef PUGHINTERP_DEBUG + for (proc = 0; proc < pughGH->nprocs; proc++) + { + printf ("processor %d <-> %d From: %d To: %d\n", + CCTK_MyProc (GH), proc, myGH->N_points_from[proc], + myGH->N_points_to[proc]); + } +#endif + + /* Great. Now we know how many to expect from each processor, + and how many to send to each processor. So first we have + to send the locations to the processors which hold our data. + This means I send interp_coords[i][point] to whichproc[point]. + I have N_points_from[proc] to send to each processor. + */ + + /* This is backwards in the broadcast location; the number of points + we are getting is how many everyone else is sending to us, + eg, N_points_to, not how many we get back from everyone else, + eg, N_points_from. The number we are sending, of course, is + all of our locations, eg, N_points */ + for (proc = N_points_local = 0; proc < pughGH->nprocs; proc++) + { + N_points_local += myGH->N_points_to[proc]; + } + +#ifdef PUGHINTERP_DEBUG + printf ("processor %d gets %d points in total\n", + CCTK_MyProc (GH), N_points_local); +#endif + + /* allocate the local coordinates array (sorted in processor order) */ + interp_coords_proc = NULL; + if (N_points > 0) + { + interp_coords_proc = malloc (N_dims * N_points * sizeof (CCTK_REAL)); + } + + /* now sort my own coordinates as tupels of [N_dims] */ + for (proc = j = 0; proc < pughGH->nprocs; proc++) + { + for (point = 0; point < N_points; point++) + { + if (myGH->whichproc[point] == proc) + { + for (i = 0; i < N_dims; i++) + { + interp_coords_proc[N_dims*j + i] = + ((const CCTK_REAL *) interp_coords[i])[point]; + } + myGH->indices[j++] = point; + } + } + } + + /* So load up the send and recv stuff */ + /* Send N_dims elements per data point */ + myGH->sendcnt[0] = N_dims * myGH->N_points_from[0]; + myGH->recvcnt[0] = N_dims * myGH->N_points_to[0]; + myGH->senddispl[0] = myGH->recvdispl[0] = 0; + for (proc = 1; proc < pughGH->nprocs; proc++) + { + myGH->sendcnt[proc] = N_dims * myGH->N_points_from[proc]; + myGH->recvcnt[proc] = N_dims * myGH->N_points_to[proc]; + myGH->senddispl[proc] = myGH->senddispl[proc-1] + myGH->sendcnt[proc-1]; + myGH->recvdispl[proc] = myGH->recvdispl[proc-1] + myGH->recvcnt[proc-1]; + } + + /* Great, and now exchange the coordinates and collect the ones + that I have to process in *interp_coords_local[] */ + coords = NULL; + if (N_points_local > 0) + { + coords = malloc (N_dims * N_points_local * sizeof (CCTK_REAL)); + } + CACTUS_MPI_ERROR (MPI_Alltoallv (interp_coords_proc, myGH->sendcnt, + myGH->senddispl, PUGH_MPI_REAL, + coords, myGH->recvcnt, + myGH->recvdispl, PUGH_MPI_REAL, + pughGH->PUGH_COMM_WORLD)); + + /* don't need this anymore */ + if (interp_coords_proc) + { + free (interp_coords_proc); + } + + /* finally, sort the local coordinates array (which is flat one-dimensional) + into the interp_coords[N_dim][N_points_local] array */ + interp_coords_local = malloc (N_dims * sizeof (CCTK_REAL *)); + for (i = 0; i < N_dims; i++) + { + interp_coords_local[i] = NULL; + if (N_points_local > 0) + { + interp_coords_local[i] = malloc (N_points_local * sizeof (CCTK_REAL)); + for (point = 0; point < N_points_local; point++) + { + interp_coords_local[i][point] = coords[point*N_dims + i]; + } + } + } + + if (coords) + { + free (coords); + } + + /* allocate intermediate output arrays for local interpolation */ + output_arrays_local = calloc (N_output_arrays, sizeof (void *)); + if (N_points_local > 0) + { + for (array = 0; array < N_output_arrays; array++) + { + this = type_desc + output_array_types[array]; + output_arrays_local[array] = malloc (N_points_local * this->vtypesize); + } + + /* now call the local interpolator for all local points and store + the results in the intermediate local output arrays */ + retval = CCTK_InterpLocalUniform (N_dims, local_interp_handle, + local_param_table_handle, + origin_local, delta_local, N_points_local, + interp_coords_type, + (const void **) interp_coords_local, + N_input_arrays, input_array_dims, + input_array_types, input_arrays, + N_output_arrays, output_array_types, + (void **) output_arrays_local); + + /* don't need these anymore */ + for (i = 0; i < N_dims; i++) + { + free (interp_coords_local[i]); + } + } + + /* clean up some intermediate arrays */ + free (interp_coords_local); + free (origin_local); + free (input_arrays); + free (input_array_dims); + + /* now send the interpolation results to the processors which requested them, + and also receive my own results that were computed remotely. + Before we can do the communication in one go (for each datatype, of course) + we have to sort the results from the intermediate output arrays, which the + local interpolator wanted, into a single contiguous communication buffer.*/ + for (type = 0, this = type_desc; type <= N_types; type++, this++) + { + /* skip unused types */ + if (this->N_arrays <= 0) + { + continue; + } + + /* set up the communication (this is type-independent) */ + myGH->sendcnt[0] = this->N_arrays * myGH->N_points_to[0]; + myGH->recvcnt[0] = this->N_arrays * myGH->N_points_from[0]; + myGH->senddispl[0] = myGH->recvdispl[0] = 0; + for (proc = 1; proc < pughGH->nprocs; proc++) + { + myGH->sendcnt[proc] = this->N_arrays * myGH->N_points_to[proc]; + myGH->recvcnt[proc] = this->N_arrays * myGH->N_points_from[proc]; + myGH->senddispl[proc] = myGH->senddispl[proc-1] + myGH->sendcnt[proc-1]; + myGH->recvdispl[proc] = myGH->recvdispl[proc-1] + myGH->recvcnt[proc-1]; + } + + /* Allocate contiguous communication buffer for each datatype into which + the local interpolation results from all input arrays of that datatype + will be written to. + If there are no points to send/receive by this processor + set the buffer pointer to an invalid but non-NULL value + otherwise we might get trouble with NULL pointers in MPI_Alltoallv () */ + + /* dereferencing such an address should code crash on most systems */ + this->sendbuf = this->recvbuf = (void *) this->vtypesize; + + /* here goes the allocation for the send buffer, along with copying the + results from the intermediate local output arrays */ + if (N_points_local > 0) + { + this->buf = malloc (this->N_arrays * N_points_local * this->vtypesize); + this->sendbuf = this->buf; + + for (proc = offset = 0; proc < pughGH->nprocs; proc++) + { + for (array = 0; array < N_output_arrays; array++) + { + if (output_array_types[array] != type) + { + continue; + } + + memcpy (this->buf, output_arrays_local[array] + offset, + myGH->N_points_to[proc] * this->vtypesize); + this->buf += myGH->N_points_to[proc] * this->vtypesize; + } + offset += myGH->N_points_to[proc] * this->vtypesize; + } + } + + /* receive buffer is easy */ + if (N_points > 0) + { + this->recvbuf = malloc (this->N_arrays * N_points*this->vtypesize); + } + + /* now do the global exchange for this datatype */ + CACTUS_MPI_ERROR (MPI_Alltoallv (this->sendbuf, myGH->sendcnt, + myGH->senddispl, this->mpitype, + this->recvbuf, myGH->recvcnt, + myGH->recvdispl, this->mpitype, + pughGH->PUGH_COMM_WORLD)); + + /* now that the data is sent we don't need the send buffer anymore */ + if (N_points_local > 0) + { + free (this->sendbuf); + } + + /* no sort neccessary if there are no points */ + if (N_points <= 0) + { + continue; + } + + /* go back from processor-sorted data to input-ordered data. + The creation of the indices array above makes this not so bad. */ + this->buf = this->recvbuf; + for (proc = offset = 0; proc < pughGH->nprocs; proc++) + { + if (myGH->N_points_from[proc] <= 0) + { + continue; + } + + for (array = 0; array < N_output_arrays; array++) + { + if (output_array_types[array] != type) + { + continue; + } + + /* now do the sorting according to the CCTK data type */ + switch (output_array_types[array]) + { + case CCTK_VARIABLE_CHAR: SORT_TYPED_ARRAY (CCTK_BYTE); break; + case CCTK_VARIABLE_INT: SORT_TYPED_ARRAY (CCTK_INT); break; + case CCTK_VARIABLE_REAL: SORT_TYPED_ARRAY (CCTK_REAL); break; + case CCTK_VARIABLE_COMPLEX: SORT_TYPED_ARRAY (CCTK_COMPLEX); break; +#ifdef CCTK_REAL4 + case CCTK_VARIABLE_REAL4: SORT_TYPED_ARRAY (CCTK_REAL4); break; + case CCTK_VARIABLE_COMPLEX8: SORT_TYPED_ARRAY (CCTK_COMPLEX8); break; +#endif +#ifdef CCTK_REAL8 + case CCTK_VARIABLE_REAL8: SORT_TYPED_ARRAY (CCTK_REAL8); break; + case CCTK_VARIABLE_COMPLEX16: SORT_TYPED_ARRAY (CCTK_COMPLEX16);break; +#endif +#ifdef CCTK_REAL16 + case CCTK_VARIABLE_REAL16: SORT_TYPED_ARRAY (CCTK_REAL16); break; + case CCTK_VARIABLE_COMPLEX32: SORT_TYPED_ARRAY (CCTK_COMPLEX32);break; +#endif + default: CCTK_WARN (0, "Implementation error"); + } + + } /* end of loop over all output arrays */ + + /* advance the offset into the communication receive buffer */ + offset += myGH->N_points_from[proc]; + + } /* end of loop over all processors */ + + /* this communication receive buffer isn't needed anymore */ + free (this->recvbuf); + + } /* end of loop over all types */ + + /* free intermediate output arrays */ + for (array = 0; array < N_output_arrays; array++) + { + if (output_arrays_local[array]) + { + free (output_arrays_local[array]); + } + } + free (output_arrays_local); + + /* free remaining resources allocated within this run */ + if (myGH->whichproc) + { + free (myGH->whichproc); + myGH->whichproc = NULL; + } + free (type_desc); +#endif /* MPI */ + + Util_TableDestroy (param_table_handle); + + return (retval); +} + + +/******************************************************************** + ******************** Internal Routines ************************ + ********************************************************************/ + /*@@ + @routine CreateParameterTable + @date Fri 20 Dec 2002 + @author Thomas Radke + @desc + Parses the options given in the user-supplied global parameter + table and creates a parameter table to be passed to the local + interpolator. + The table is cloned from the user-supplied local parameter table + - or created as an empty table if there is no local parameter + table - and then completed with options from the global + interpolator. + @enddesc + + @var global_param_table_handle + @vdesc handle to the user-supplied global parameter table + @vtype int + @vio in + @endvar + @var local_param_table_handle + @vdesc handle to the user-supplied local parameter table + @vtype int + @vio in + @endvar + + @returntype int + @returndesc + handle to the new local parameter table, or
+ one of the UTIL_ERROR_TABLE_* error codes + @endreturndesc +@@*/ +static int CreateParameterTable (int global_param_table_handle, + int local_param_table_handle) +{ + int retval; + + + retval = local_param_table_handle >= 0 ? + Util_TableClone (local_param_table_handle) : Util_TableCreate (0); + if (retval < 0) + { + CCTK_WARN (1, "couldn't create/clone local interpolator parameter table"); + } + + /* FIXME: must evaluate options from global parameter table */ + if (global_param_table_handle >= 0) + { + CCTK_WARN (1, "options in global interpolator parameter table are ignored " + "so far"); + } + + return (retval); +} diff --git a/src/make.code.defn b/src/make.code.defn index b8d41c1..eba2a31 100644 --- a/src/make.code.defn +++ b/src/make.code.defn @@ -2,4 +2,4 @@ # $Header$ # Source files in this directory -SRCS = Startup.c Operator.c Interpolate.c +SRCS = Startup.c Operator.c Interpolate.c InterpGridArrays.c diff --git a/src/pughInterpGH.h b/src/pughInterpGH.h index 543d1d5..2abd51b 100644 --- a/src/pughInterpGH.h +++ b/src/pughInterpGH.h @@ -32,6 +32,22 @@ typedef struct } pughInterpGH; /* point-sorted order */ +/* prototype of PUGHInterp's routine which overloads CCTK_InterpGridArrays() */ +int PUGHInterp_InterpGridArrays (const cGH *GH, + int N_dims, + int global_param_table_handle, + int local_param_table_handle, + int local_interp_handle, + int coord_system_handle, + int N_interp_points, + int interp_coords_type, + const void *const interp_coords[], + int N_input_arrays, + const CCTK_INT input_array_indices[], + int N_output_arrays, + const CCTK_INT output_array_types[], + void *const output_arrays[]); + /* prototypes of interpolation operators to be registered */ int PUGHInterp_InterpGV (cGH *GH, int order, -- cgit v1.2.3