aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryye00 <yye00@d60812e6-3970-4df4-986e-c251b06effeb>2005-11-08 20:18:44 +0000
committeryye00 <yye00@d60812e6-3970-4df4-986e-c251b06effeb>2005-11-08 20:18:44 +0000
commita0874cffdf5002abcf7ae3b7cdd12a7bf3b4bb36 (patch)
treea9c947e9f2016a6737bf184609a243ccd768ced8
parent95ec587a0c1567950f0f2b02e1a734e0cbaa5f21 (diff)
pointwise reduction implementation in PUGHReduce
git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHReduce/trunk@77 d60812e6-3970-4df4-986e-c251b06effeb
-rw-r--r--src/ReducePointwise.c1227
1 files changed, 1227 insertions, 0 deletions
diff --git a/src/ReducePointwise.c b/src/ReducePointwise.c
new file mode 100644
index 0000000..de85e3e
--- /dev/null
+++ b/src/ReducePointwise.c
@@ -0,0 +1,1227 @@
+ /*@@
+ @file ReducePointwise.c
+ @date
+ @author Yaakoub Y El Khamra
+ @desc
+ New pointwise reduction implementation
+ @enddesc
+ @version $Header$
+ @@*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+
+#include "util_Table.h"
+#include "pugh_reductions.h"
+
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+#define SMALL_NUMBER 0
+
+static const char *rcsid = "$Id:";
+
+CCTK_FILEVERSION(CactusPUGH_PUGHReduce_ReducePointwise_c);
+
+/* local function prototypes */
+static int ReducePointwise (const cGH *GH,
+ int dest_proc,
+ int local_reduce_handle,
+ int param_table_handle,
+ int N_input_arrays,
+ const void * const input_arrays[],
+ int input_dims,
+ const CCTK_INT input_array_dims[],
+ const CCTK_INT input_array_type_codes[],
+ int M_output_values,
+ const CCTK_INT output_value_type_codes[],
+ void* const output_values[]);
+
+
+ /*@@
+ @routine PUGH_ReducePointwise
+ @date
+ @author Yaakoub El Khamra
+ @desc
+ Generic routine for doing a pointwise reduction
+ @enddesc
+ @var GH
+ @vdesc pointer to the grid hierarchy
+ @vtype cGH *
+ @vio in
+ @endvar
+ @var dest_proc
+ @vdesc the number of the processor to which we want to reduce (-1) for all-reduce
+ @vtype int
+ @vio in
+ @endvar
+ @var local_reduce_handle
+ @vdesc the handle specifying the reduction operator
+ @vtype int
+ @vio in
+ @endvar
+ @var param_reduce_handle
+ @vdesc the parameter table handle
+ @vtype int
+ @vio in
+ @endvar
+ @var N_input_arrays
+ @vdesc number of elements in the reduction input
+ @vtype int
+ @vio in
+ @endvar
+ @var input_arrays
+ @vdesc input arrays
+ @vtype const void *const []
+ @vio in
+ @endvar
+ @var input_dims
+ @vdesc number of dimensions
+ @vtype int
+ @vio in
+ @endvar
+ @var input_array_sizes
+ @vdesc sizes of the input arrays
+ @vtype const CCTK_INT []
+ @vio in
+ @endvar
+ @var input_array_type_codes
+ @vdesc types of the input arrays
+ @vtype const CCTK_INT []
+ @vio in
+ @endvar
+ @var M_output_values
+ @vdesc number of output values
+ @vtype int
+ @vio in
+ @endvar
+ @var output_value_type_codes
+ @vdesc array containing output value type codes
+ @vtype const CCTK_IN
+ @vio in
+ @endvar
+ @var output_values
+ @vdesc array of output values
+ @vtype void * const
+ @vio in
+ @endvar
+@@*/
+int PUGH_ReducePointwise (const cGH *GH,
+ int dest_proc,
+ int local_reduce_handle,
+ int param_table_handle,
+ int N_input_arrays,
+ const void * const input_arrays[],
+ int input_dims,
+ const CCTK_INT input_array_dims[],
+ const CCTK_INT input_array_type_codes[],
+ int M_output_values,
+ const CCTK_INT output_value_type_codes[],
+ void* const output_values[])
+{
+ int retval;
+ retval = ReducePointwise(GH, dest_proc, local_reduce_handle,
+ param_table_handle, N_input_arrays,
+ input_arrays, input_dims,
+ input_array_dims, input_array_type_codes,
+ M_output_values, output_value_type_codes,
+ output_values);
+ return retval;
+}
+
+
+/*****************************************************************************/
+/* local functions */
+/*****************************************************************************/
+/*@@
+ @routine ReduceGridArrays
+ @date
+ @author Yaakoub Y El Khamra
+ @desc Returns the pointwise reduction of an array
+ @enddesc
+@@*/
+static int ReducePointwise (const cGH *GH,
+ int dest_proc,
+ int local_reduce_handle,
+ int param_table_handle,
+ int N_input_arrays,
+ const void * const input_arrays[],
+ int input_dims,
+ const CCTK_INT input_array_max_dims[],
+ const CCTK_INT pointwise_input_array_type_codes[],
+ int M_output_values,
+ const CCTK_INT output_value_type_codes[],
+ void* const output_values[])
+{
+ /* Utility variables */
+ int i, ierr = 0;
+ int dim = 0;
+ int proc;
+ int num_points=0;
+ int total_num_points = 1;
+ int perform_division = 1;
+ int perform_2_root = 1;
+ int perform_3_root = 1;
+ int perform_4_root = 1;
+ int perform_all_reduce = 1;
+ pGA *GA;
+ const void *data;
+ int created_local_par_table = 0;
+
+ /* weight variables */
+ int weight_variable_index;
+ int weight_on = 0;
+ CCTK_REAL weight_sum = 0.0;
+ CCTK_REAL total_weight_sum = 1.0;
+ const void * weight;
+
+ CCTK_INT * lower_array_bounds;
+ CCTK_INT * min_array_subscript;
+ CCTK_INT * array_lsh;
+ CCTK_INT * input_array_dims;
+ CCTK_INT * input_array_type_codes;
+ CCTK_INT * input_array_gz;
+
+ /* temporary complex variables*/
+ CCTK_COMPLEX cmplx_tmp;
+ #ifdef HAVE_CCTK_COMPLEX8
+ CCTK_COMPLEX8 cmplx_tmp8;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX16
+ CCTK_COMPLEX16 cmplx_tmp16;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX32
+ CCTK_COMPLEX32 cmplx_tmp32;
+ #endif
+
+#ifdef CCTK_MPI
+ int nprocs = 0, myproc =0, global_operation = 0;
+ const pGH *pughGH = NULL;
+ void *local_outvals = NULL;
+ nprocs = CCTK_nProcs(GH);
+ myproc = CCTK_MyProc(GH);
+ if (dest_proc >=0 && dest_proc <=nprocs)
+ {
+ perform_all_reduce = 0;
+ }
+ else if (dest_proc == -1)
+ {
+ perform_all_reduce = 1;
+ }
+ else
+ {
+ CCTK_WARN (0, "PUGHReduce Destination processor for global reduction\n \
+ is out of bounds");
+ }
+#endif
+
+
+ /* prevent compiler warnings about unused parameters */
+ (void) (GH + 0);
+
+ /* allocate memory for type codes and input array pointer */
+ input_array_type_codes = (CCTK_INT *)malloc (N_input_arrays *sizeof(CCTK_INT));
+ input_arrays = (CCTK_POINTER) malloc (N_input_arrays *sizeof(CCTK_POINTER));
+
+ /* set the number of dimensions */
+ dim = input_dims;
+
+ /* allocate memory for the array of the dimensions of the input arrays */
+ lower_array_bounds = (CCTK_INT *)malloc (dim *sizeof(CCTK_INT));
+ min_array_subscript = (CCTK_INT *)malloc (dim *sizeof(CCTK_INT));
+ array_lsh = (CCTK_INT *)malloc (dim *sizeof(CCTK_INT));
+ input_array_dims = (CCTK_INT *)malloc (dim *sizeof(CCTK_INT));
+ input_array_gz = (CCTK_INT *)malloc (dim *sizeof(CCTK_INT));
+
+ /* find out the types of the input arrays and put that */
+ /* in an array ipughGH->GFExtras[dim]->lnsize[i]*/
+ for ( i = 0; i < N_input_arrays; i++)
+ {
+ input_array_type_codes[i] = pointwise_input_array_type_codes[i];
+ }
+
+ /* calculate index ranges */
+ for ( i=0; i < dim; i++)
+ {
+ /* get the start- and endpoint to iterate over in this dimension */
+ min_array_subscript[i] = 0;
+ array_lsh[i] = input_array_max_dims[i];
+ input_array_dims[i] = input_array_max_dims[i];
+ }
+
+ /* Create the parameter table if it is not there and add the bounds to it */
+ if ( Util_TableQueryNKeys(param_table_handle) < 0)
+ {
+ created_local_par_table = 1;
+ param_table_handle = Util_TableCreate (UTIL_TABLE_FLAGS_DEFAULT);
+ ierr = Util_TableSetGenericArray (param_table_handle, CCTK_VARIABLE_INT, dim,min_array_subscript, "input_array_min_subscripts");
+ ierr = Util_TableSetGenericArray (param_table_handle, CCTK_VARIABLE_INT, dim,array_lsh, "input_array_max_subscripts");
+ }
+ else
+ {
+ if (!Util_TableQueryValueInfo(param_table_handle, NULL, NULL, "input_array_min_subscripts"))
+ {
+ ierr = Util_TableSetGenericArray (param_table_handle, CCTK_VARIABLE_INT, dim,min_array_subscript, "input_array_min_subscripts");
+ }
+ if (!Util_TableQueryValueInfo(param_table_handle, NULL, NULL, "input_array_max_subscripts"))
+ {
+ ierr = Util_TableSetGenericArray (param_table_handle, CCTK_VARIABLE_INT, dim,array_lsh, "input_array_max_subscripts");
+ }
+ }
+
+ /* Set flag to tell local reduction not to divide by num_points */
+ ierr = Util_TableSetInt(param_table_handle, 1, "global_calling");
+
+ /* Set the weight variable from the weight variable index */
+ ierr = Util_TableGetInt(param_table_handle, &weight_variable_index, "weight_variable_index");
+ if (ierr == 1)
+ {
+ weight = CCTK_VarDataPtrI(GH, 0, weight_variable_index);
+ ierr = Util_TableSetPointerToConst(param_table_handle, weight, "weight");
+ }
+ else
+ {
+ weight = NULL;
+ ierr = Util_TableSetInt(param_table_handle, 0, "weight_on");
+ }
+
+ ierr = CCTK_ReduceLocalArrays(dim, local_reduce_handle, param_table_handle,
+ N_input_arrays, input_array_dims, input_array_type_codes,
+ (void *)input_arrays, M_output_values, output_value_type_codes, output_values);
+
+ /* Get flag to perform division by number of points or not, number_of_points and weight_sum */
+ ierr = Util_TableGetInt(param_table_handle, &perform_division, "perform_division");
+ ierr = Util_TableGetInt(param_table_handle, &num_points, "num_points");
+ ierr = Util_TableGetReal(param_table_handle, &weight_sum, "weight_sum");
+ ierr = Util_TableGetInt(param_table_handle, &weight_on, "weight_on");
+
+ /* Get flags to perform root operations */
+ ierr = Util_TableGetInt(param_table_handle, &perform_2_root, "perform_2_root");
+ ierr = Util_TableGetInt(param_table_handle, &perform_3_root, "perform_3_root");
+ ierr = Util_TableGetInt(param_table_handle, &perform_4_root, "perform_4_root");
+
+#ifdef CCTK_MPI
+ pughGH = PUGH_pGH (GH);
+ ierr = Util_TableGetInt(param_table_handle, &proc, "proc");
+ ierr = Util_TableGetInt(param_table_handle, &global_operation, "global_operation");
+
+ if (perform_all_reduce)
+ {
+ CACTUS_MPI_ERROR (MPI_Allreduce (&num_points, &total_num_points, 1,
+ PUGH_MPI_INT, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ CACTUS_MPI_ERROR (MPI_Allreduce (&weight_sum, &total_weight_sum, 1,
+ PUGH_MPI_REAL, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ CACTUS_MPI_ERROR (MPI_Reduce (&num_points, &total_num_points, 1,
+ PUGH_MPI_INT, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ CACTUS_MPI_ERROR (MPI_Reduce (&weight_sum, &total_weight_sum, 1,
+ PUGH_MPI_REAL, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ for (i = 0; i< M_output_values; i++)
+ {
+ switch (output_value_type_codes[i])
+ {
+ /* out values type switches*/
+ case CCTK_VARIABLE_BYTE:
+ local_outvals = malloc (1 * sizeof (CCTK_BYTE));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_BYTE *) output_values[i], 1 * sizeof (CCTK_BYTE));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_BYTE *) output_values[i], 1,
+ PUGH_MPI_BYTE, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_BYTE *) output_values[i], 1,
+ PUGH_MPI_BYTE, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_BYTE *) output_values[i], 1,
+ PUGH_MPI_BYTE, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_BYTE *) output_values[i], 1,
+ PUGH_MPI_BYTE, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_BYTE *) output_values[i], 1,
+ PUGH_MPI_BYTE, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_BYTE *) output_values[i], 1,
+ PUGH_MPI_BYTE, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ case CCTK_VARIABLE_INT:
+ local_outvals = malloc (1 * sizeof (CCTK_INT));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_INT *) output_values[i], 1 * sizeof (CCTK_INT));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1,
+ PUGH_MPI_INT, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1,
+ PUGH_MPI_INT, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1,
+ PUGH_MPI_INT, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT *) output_values[i], 1,
+ PUGH_MPI_INT, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT *) output_values[i], 1,
+ PUGH_MPI_INT, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT *) output_values[i], 1,
+ PUGH_MPI_INT, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #ifdef HAVE_CCTK_INT1
+ case CCTK_VARIABLE_INT1:
+ local_outvals = malloc (1 * sizeof (CCTK_INT1));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_INT1 *) output_values[i], 1 * sizeof (CCTK_INT1));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1,
+ PUGH_MPI_INT1, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1,
+ PUGH_MPI_INT1, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1,
+ PUGH_MPI_INT1, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT1 *) output_values[i], 1,
+ PUGH_MPI_INT1, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT1 *) output_values[i], 1,
+ PUGH_MPI_INT1, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT1 *) output_values[i], 1,
+ PUGH_MPI_INT1, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT2
+ case CCTK_VARIABLE_INT2:
+ local_outvals = malloc (1 * sizeof (CCTK_INT2));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_INT2 *) output_values[i], 1 * sizeof (CCTK_INT2));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1,
+ PUGH_MPI_INT2, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1,
+ PUGH_MPI_INT2, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1,
+ PUGH_MPI_INT2, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT2 *) output_values[i], 1,
+ PUGH_MPI_INT2, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT2 *) output_values[i], 1,
+ PUGH_MPI_INT2, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT2 *) output_values[i], 1,
+ PUGH_MPI_INT2, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT4
+ case CCTK_VARIABLE_INT4:
+ local_outvals = malloc (1 * sizeof (CCTK_INT4));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_INT4 *) output_values[i], 1 * sizeof (CCTK_INT4));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1,
+ PUGH_MPI_INT4, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1,
+ PUGH_MPI_INT4, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1,
+ PUGH_MPI_INT4, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT4 *) output_values[i], 1,
+ PUGH_MPI_INT4, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT4 *) output_values[i], 1,
+ PUGH_MPI_INT4, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT4 *) output_values[i], 1,
+ PUGH_MPI_INT4, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT8
+ case CCTK_VARIABLE_INT8:
+ local_outvals = malloc (1 * sizeof (CCTK_INT8));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_INT8 *) output_values[i], 1 * sizeof (CCTK_INT8));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1,
+ PUGH_MPI_INT8, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1,
+ PUGH_MPI_INT8, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1,
+ PUGH_MPI_INT8, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT8 *) output_values[i], 1,
+ PUGH_MPI_INT8, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT8 *) output_values[i], 1,
+ PUGH_MPI_INT8, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT8 *) output_values[i], 1,
+ PUGH_MPI_INT8, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+
+ break;
+ #endif
+ case CCTK_VARIABLE_REAL:
+ local_outvals = malloc (1 * sizeof (CCTK_REAL));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_REAL *) output_values[i], 1 * sizeof (CCTK_REAL));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1,
+ PUGH_MPI_REAL, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1,
+ PUGH_MPI_REAL, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1,
+ PUGH_MPI_REAL, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL *) output_values[i], 1,
+ PUGH_MPI_REAL, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL *) output_values[i], 1,
+ PUGH_MPI_REAL, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL *) output_values[i], 1,
+ PUGH_MPI_REAL, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #ifdef HAVE_CCTK_REAL4
+ case CCTK_VARIABLE_REAL4:
+ local_outvals = malloc (1 * sizeof (CCTK_REAL4));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_REAL4 *) output_values[i], 1 * sizeof (CCTK_REAL4));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1,
+ PUGH_MPI_REAL4, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1,
+ PUGH_MPI_REAL4, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1,
+ PUGH_MPI_REAL4, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1,
+ PUGH_MPI_REAL4, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1,
+ PUGH_MPI_REAL4, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1,
+ PUGH_MPI_REAL4, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL8
+ case CCTK_VARIABLE_REAL8:
+ local_outvals = malloc (1 * sizeof (CCTK_REAL8));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_REAL8 *) output_values[i], 1 * sizeof (CCTK_REAL8));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1,
+ PUGH_MPI_REAL8, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1,
+ PUGH_MPI_REAL8, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1,
+ PUGH_MPI_REAL8, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1,
+ PUGH_MPI_REAL8, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1,
+ PUGH_MPI_REAL8, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1,
+ PUGH_MPI_REAL8, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL16
+ case CCTK_VARIABLE_REAL16:
+ local_outvals = malloc (1 * sizeof (CCTK_REAL16));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_REAL16 *) output_values[i], 1 * sizeof (CCTK_REAL16));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1,
+ PUGH_MPI_REAL16, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1,
+ PUGH_MPI_REAL16, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1,
+ PUGH_MPI_REAL16, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1,
+ PUGH_MPI_REAL16, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1,
+ PUGH_MPI_REAL16, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1,
+ PUGH_MPI_REAL16, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ case CCTK_VARIABLE_COMPLEX:
+ local_outvals = malloc (1 * sizeof (CCTK_COMPLEX));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_COMPLEX *) output_values[i], 1 * sizeof (CCTK_COMPLEX));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #ifdef HAVE_CCTK_COMPLEX8
+ case CCTK_VARIABLE_COMPLEX8:
+ local_outvals = malloc (1 * sizeof (CCTK_COMPLEX8));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1 * sizeof (CCTK_COMPLEX8));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex8, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex8, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex8, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex8, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex8, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex8, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX16
+ case CCTK_VARIABLE_COMPLEX16:
+ local_outvals = malloc (1 * sizeof (CCTK_COMPLEX16));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1 * sizeof (CCTK_COMPLEX16));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex16, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex16, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex16, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex16, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex16, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex16, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX32
+ case CCTK_VARIABLE_COMPLEX32:
+ local_outvals = malloc (1 * sizeof (CCTK_COMPLEX32));
+
+ /* outvals[] contains now the local reduction values */
+ memcpy ( local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1 * sizeof (CCTK_COMPLEX32));
+
+ if (perform_all_reduce)
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex32, MPI_MAX, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex32, MPI_MIN, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex32, MPI_SUM, pughGH->PUGH_COMM_WORLD));
+ }
+ else
+ {
+ if (global_operation == 1)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex32, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 2)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex32, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD));
+ else if (global_operation == 3)
+ CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1,
+ pughGH->PUGH_mpi_complex32, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD));
+ }
+ break;
+ #endif
+ }
+ }
+ num_points = total_num_points;
+ weight_sum = total_weight_sum;
+ free (local_outvals);
+#endif
+
+ if (perform_division == 0)
+ {
+ if (weight_on == 1)
+ {
+ if (ABS(weight_sum) > SMALL_NUMBER)
+ {
+ for (i = 0; i< M_output_values; i++)
+ {
+ switch (output_value_type_codes[i])
+ {
+ /* out values type switches*/
+ case CCTK_VARIABLE_BYTE:
+ *( (CCTK_BYTE *) output_values[i]) = *( (CCTK_BYTE *) output_values[i]) / weight_sum;
+ break;
+ case CCTK_VARIABLE_INT:
+ *( (CCTK_INT *) output_values[i]) = *( (CCTK_INT *) output_values[i]) / weight_sum;
+ break;
+ #ifdef HAVE_CCTK_INT1
+ case CCTK_VARIABLE_INT1:
+ *( (CCTK_INT1 *) output_values[i]) = *( (CCTK_INT1 *) output_values[i]) / weight_sum;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT2
+ case CCTK_VARIABLE_INT2:
+ *( (CCTK_INT2 *) output_values[i]) = *( (CCTK_INT2 *) output_values[i]) / weight_sum;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT4
+ case CCTK_VARIABLE_INT4:
+ *( (CCTK_INT4 *) output_values[i]) = *( (CCTK_INT4 *) output_values[i]) / weight_sum;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT8
+ case CCTK_VARIABLE_INT8:
+ *( (CCTK_INT8 *) output_values[i]) = *( (CCTK_INT8 *) output_values[i]) / weight_sum;
+ break;
+ #endif
+ case CCTK_VARIABLE_REAL:
+ *( (CCTK_REAL *) output_values[i]) = *( (CCTK_REAL *) output_values[i]) / weight_sum;
+ break;
+ #ifdef HAVE_CCTK_REAL4
+ case CCTK_VARIABLE_REAL4:
+ *( (CCTK_REAL4 *) output_values[i]) = *( (CCTK_REAL4 *) output_values[i]) / weight_sum;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL8
+ case CCTK_VARIABLE_REAL8:
+ *( (CCTK_REAL8 *) output_values[i]) = *( (CCTK_REAL8 *) output_values[i]) / weight_sum;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL16
+ case CCTK_VARIABLE_REAL16:
+ *( (CCTK_REAL16 *) output_values[i]) = *( (CCTK_REAL16 *) output_values[i]) / weight_sum;
+ break;
+ #endif
+ case CCTK_VARIABLE_COMPLEX:
+ (*( (CCTK_COMPLEX *) output_values[i])).Re = (*( (CCTK_COMPLEX *) output_values[i])).Re / weight_sum;
+ (*( (CCTK_COMPLEX *) output_values[i])).Im = (*( (CCTK_COMPLEX *) output_values[i])).Im / weight_sum;
+ break;
+ #ifdef HAVE_CCTK_COMPLEX8
+ case CCTK_VARIABLE_COMPLEX8:
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Re = (*( (CCTK_COMPLEX8 *) output_values[i])).Re / weight_sum;
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Im = (*( (CCTK_COMPLEX8 *) output_values[i])).Im / weight_sum;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX16
+ case CCTK_VARIABLE_COMPLEX16:
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Re = (*( (CCTK_COMPLEX16 *) output_values[i])).Re / weight_sum;
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Im = (*( (CCTK_COMPLEX16 *) output_values[i])).Im / weight_sum;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX32
+ case CCTK_VARIABLE_COMPLEX32:
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Re = (*( (CCTK_COMPLEX32 *) output_values[i])).Re / weight_sum;
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Im = (*( (CCTK_COMPLEX32 *) output_values[i])).Im / weight_sum;
+ break;
+ #endif
+ }
+ }
+ }
+ else
+ {
+ CCTK_WARN (1, "The sum of weights in average reduction is zero");
+ return (-1);
+ }
+ }
+ else
+ {
+ for (i = 0; i< M_output_values; i++)
+ {
+ switch (output_value_type_codes[i])
+ {
+ /* out values type switches*/
+ case CCTK_VARIABLE_BYTE:
+ *( (CCTK_BYTE *) output_values[i]) = *( (CCTK_BYTE *) output_values[i]) / num_points;
+ break;
+ case CCTK_VARIABLE_INT:
+ *( (CCTK_INT *) output_values[i]) = *( (CCTK_INT *) output_values[i]) / num_points;
+ break;
+ #ifdef HAVE_CCTK_INT1
+ case CCTK_VARIABLE_INT1:
+ *( (CCTK_INT1 *) output_values[i]) = *( (CCTK_INT1 *) output_values[i]) / num_points;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT2
+ case CCTK_VARIABLE_INT2:
+ *( (CCTK_INT2 *) output_values[i]) = *( (CCTK_INT2 *) output_values[i]) / num_points;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT4
+ case CCTK_VARIABLE_INT4:
+ *( (CCTK_INT4 *) output_values[i]) = *( (CCTK_INT4 *) output_values[i]) / num_points;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT8
+ case CCTK_VARIABLE_INT8:
+ *( (CCTK_INT8 *) output_values[i]) = *( (CCTK_INT8 *) output_values[i]) / num_points;
+ break;
+ #endif
+ case CCTK_VARIABLE_REAL:
+ *( (CCTK_REAL *) output_values[i]) = *( (CCTK_REAL *) output_values[i]) / num_points;
+ break;
+ #ifdef HAVE_CCTK_REAL4
+ case CCTK_VARIABLE_REAL4:
+ *( (CCTK_REAL4 *) output_values[i]) = *( (CCTK_REAL4 *) output_values[i]) / num_points;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL8
+ case CCTK_VARIABLE_REAL8:
+ *( (CCTK_REAL8 *) output_values[i]) = *( (CCTK_REAL8 *) output_values[i]) / num_points;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL16
+ case CCTK_VARIABLE_REAL16:
+ *( (CCTK_REAL16 *) output_values[i]) = *( (CCTK_REAL16 *) output_values[i]) / num_points;
+ break;
+ #endif
+ case CCTK_VARIABLE_COMPLEX:
+ (*( (CCTK_COMPLEX *) output_values[i])).Re = (*( (CCTK_COMPLEX *) output_values[i])).Re / num_points;
+ (*( (CCTK_COMPLEX *) output_values[i])).Im = (*( (CCTK_COMPLEX *) output_values[i])).Im / num_points;
+ break;
+ #ifdef HAVE_CCTK_COMPLEX8
+ case CCTK_VARIABLE_COMPLEX8:
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Re = (*( (CCTK_COMPLEX8 *) output_values[i])).Re / num_points;
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Im = (*( (CCTK_COMPLEX8 *) output_values[i])).Im / num_points;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX16
+ case CCTK_VARIABLE_COMPLEX16:
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Re = (*( (CCTK_COMPLEX16 *) output_values[i])).Re / num_points;
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Im = (*( (CCTK_COMPLEX16 *) output_values[i])).Im / num_points;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX32
+ case CCTK_VARIABLE_COMPLEX32:
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Re = (*( (CCTK_COMPLEX32 *) output_values[i])).Re / num_points;
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Im = (*( (CCTK_COMPLEX32 *) output_values[i])).Im / num_points;
+ break;
+ #endif
+ }
+ }
+ }
+ }
+
+ if (perform_2_root == 0)
+ {
+ for (i = 0; i< M_output_values; i++)
+ {
+ switch (output_value_type_codes[i])
+ {
+ /* out values type switches*/
+ case CCTK_VARIABLE_BYTE:
+ *( (CCTK_BYTE *) output_values[i]) = sqrt(*( (CCTK_BYTE *) output_values[i]));
+ break;
+ case CCTK_VARIABLE_INT:
+ *( (CCTK_INT *) output_values[i]) = sqrt( *( (CCTK_INT *) output_values[i]));
+ break;
+ #ifdef HAVE_CCTK_INT1
+ case CCTK_VARIABLE_INT1:
+ *( (CCTK_INT1 *) output_values[i]) = sqrt(*( (CCTK_INT1 *) output_values[i]));
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT2
+ case CCTK_VARIABLE_INT2:
+ *( (CCTK_INT2 *) output_values[i]) = sqrt(*( (CCTK_INT2 *) output_values[i]));
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT4
+ case CCTK_VARIABLE_INT4:
+ *( (CCTK_INT4 *) output_values[i]) = sqrt( *( (CCTK_INT4 *) output_values[i]));
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT8
+ case CCTK_VARIABLE_INT8:
+ *( (CCTK_INT8 *) output_values[i]) = sqrt(*( (CCTK_INT8 *) output_values[i]));
+ break;
+ #endif
+ case CCTK_VARIABLE_REAL:
+ *( (CCTK_REAL *) output_values[i]) = sqrt(*( (CCTK_REAL *) output_values[i]));
+ break;
+ #ifdef HAVE_CCTK_REAL4
+ case CCTK_VARIABLE_REAL4:
+ *( (CCTK_REAL4 *) output_values[i]) = sqrt(*( (CCTK_REAL4 *) output_values[i]));
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL8
+ case CCTK_VARIABLE_REAL8:
+ *( (CCTK_REAL8 *) output_values[i]) = sqrt(*( (CCTK_REAL8 *) output_values[i]));
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL16
+ case CCTK_VARIABLE_REAL16:
+ *( (CCTK_REAL16 *) output_values[i]) = sqrt(*( (CCTK_REAL16 *) output_values[i]));
+ break;
+ #endif
+ case CCTK_VARIABLE_COMPLEX:
+ cmplx_tmp.Re = (*( (CCTK_COMPLEX *) output_values[i])).Re;
+ cmplx_tmp.Im = (*( (CCTK_COMPLEX *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/2.0);
+ (*( (CCTK_COMPLEX *) output_values[i])).Re = cmplx_tmp.Re;
+ (*( (CCTK_COMPLEX *) output_values[i])).Im = cmplx_tmp.Im;
+ break;
+ #ifdef HAVE_CCTK_COMPLEX8
+ case CCTK_VARIABLE_COMPLEX8:
+ cmplx_tmp8.Re = (*( (CCTK_COMPLEX8 *) output_values[i])).Re;
+ cmplx_tmp8.Im = (*( (CCTK_COMPLEX8 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/2.0);
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Re = cmplx_tmp8.Re;
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Im = cmplx_tmp8.Im;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX16
+ case CCTK_VARIABLE_COMPLEX16:
+ cmplx_tmp16.Re = (*( (CCTK_COMPLEX16 *) output_values[i])).Re;
+ cmplx_tmp16.Im = (*( (CCTK_COMPLEX16 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/2.0);
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Re = cmplx_tmp16.Re;
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Im = cmplx_tmp16.Im;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX32
+ case CCTK_VARIABLE_COMPLEX32:
+ cmplx_tmp32.Re = (*( (CCTK_COMPLEX32 *) output_values[i])).Re;
+ cmplx_tmp32.Im = (*( (CCTK_COMPLEX32 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/2.0);
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Re = cmplx_tmp32.Re;
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Im = cmplx_tmp32.Im;
+ break;
+ #endif
+ }
+ }
+ }
+
+ if (perform_3_root == 0)
+ {
+ for (i = 0; i< M_output_values; i++)
+ {
+ switch (output_value_type_codes[i])
+ {
+ /* out values type switches*/
+ case CCTK_VARIABLE_BYTE:
+ *( (CCTK_BYTE *) output_values[i]) = pow(*( (CCTK_BYTE *) output_values[i]), 1.0/3.0);
+ break;
+ case CCTK_VARIABLE_INT:
+ *( (CCTK_INT *) output_values[i]) = pow(*( (CCTK_INT *) output_values[i]), 1.0/3.0);
+ break;
+ #ifdef HAVE_CCTK_INT1
+ case CCTK_VARIABLE_INT1:
+ *( (CCTK_INT1 *) output_values[i]) = pow(*( (CCTK_INT1 *) output_values[i]), 1.0/3.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT2
+ case CCTK_VARIABLE_INT2:
+ *( (CCTK_INT2 *) output_values[i]) = pow(*( (CCTK_INT2 *) output_values[i]), 1.0/3.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT4
+ case CCTK_VARIABLE_INT4:
+ *( (CCTK_INT4 *) output_values[i]) = pow( *( (CCTK_INT4 *) output_values[i]), 1.0/3.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT8
+ case CCTK_VARIABLE_INT8:
+ *( (CCTK_INT8 *) output_values[i]) = pow(*( (CCTK_INT8 *) output_values[i]), 1.0/3.0);
+ break;
+ #endif
+ case CCTK_VARIABLE_REAL:
+ *( (CCTK_REAL *) output_values[i]) = pow(*( (CCTK_REAL *) output_values[i]), 1.0/3.0);
+ break;
+ #ifdef HAVE_CCTK_REAL4
+ case CCTK_VARIABLE_REAL4:
+ *( (CCTK_REAL4 *) output_values[i]) = pow(*( (CCTK_REAL4 *) output_values[i]), 1.0/3.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL8
+ case CCTK_VARIABLE_REAL8:
+ *( (CCTK_REAL8 *) output_values[i]) = pow(*( (CCTK_REAL8 *) output_values[i]), 1.0/3.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL16
+ case CCTK_VARIABLE_REAL16:
+ *( (CCTK_REAL16 *) output_values[i]) = pow(*( (CCTK_REAL16 *) output_values[i]), 1.0/3.0);
+ break;
+ #endif
+ case CCTK_VARIABLE_COMPLEX:
+ cmplx_tmp.Re = (*( (CCTK_COMPLEX *) output_values[i])).Re;
+ cmplx_tmp.Im = (*( (CCTK_COMPLEX *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/3.0);
+ (*( (CCTK_COMPLEX *) output_values[i])).Re = cmplx_tmp.Re;
+ (*( (CCTK_COMPLEX *) output_values[i])).Im = cmplx_tmp.Im;
+ break;
+ #ifdef HAVE_CCTK_COMPLEX8
+ case CCTK_VARIABLE_COMPLEX8:
+ cmplx_tmp8.Re = (*( (CCTK_COMPLEX8 *) output_values[i])).Re;
+ cmplx_tmp8.Im = (*( (CCTK_COMPLEX8 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/3.0);
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Re = cmplx_tmp8.Re;
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Im = cmplx_tmp8.Im;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX16
+ case CCTK_VARIABLE_COMPLEX16:
+ cmplx_tmp16.Re = (*( (CCTK_COMPLEX16 *) output_values[i])).Re;
+ cmplx_tmp16.Im = (*( (CCTK_COMPLEX16 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/3.0);
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Re = cmplx_tmp16.Re;
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Im = cmplx_tmp16.Im;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX32
+ case CCTK_VARIABLE_COMPLEX32:
+ cmplx_tmp32.Re = (*( (CCTK_COMPLEX32 *) output_values[i])).Re;
+ cmplx_tmp32.Im = (*( (CCTK_COMPLEX32 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/3.0);
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Re = cmplx_tmp32.Re;
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Im = cmplx_tmp32.Im;
+ break;
+ #endif
+ }
+ }
+ }
+
+ if (perform_4_root == 0)
+ {
+ for (i = 0; i< M_output_values; i++)
+ {
+ switch (output_value_type_codes[i])
+ {
+ /* out values type switches*/
+ case CCTK_VARIABLE_BYTE:
+ *( (CCTK_BYTE *) output_values[i]) = pow(*( (CCTK_BYTE *) output_values[i]), 1.0/4.0);
+ break;
+ case CCTK_VARIABLE_INT:
+ *( (CCTK_INT *) output_values[i]) = pow(*( (CCTK_INT *) output_values[i]), 1.0/4.0);
+ break;
+ #ifdef HAVE_CCTK_INT1
+ case CCTK_VARIABLE_INT1:
+ *( (CCTK_INT1 *) output_values[i]) = pow(*( (CCTK_INT1 *) output_values[i]), 1.0/4.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT2
+ case CCTK_VARIABLE_INT2:
+ *( (CCTK_INT2 *) output_values[i]) = pow(*( (CCTK_INT2 *) output_values[i]), 1.0/4.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT4
+ case CCTK_VARIABLE_INT4:
+ *( (CCTK_INT4 *) output_values[i]) = pow( *( (CCTK_INT4 *) output_values[i]), 1.0/4.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_INT8
+ case CCTK_VARIABLE_INT8:
+ *( (CCTK_INT8 *) output_values[i]) = pow(*( (CCTK_INT8 *) output_values[i]), 1.0/4.0);
+ break;
+ #endif
+ case CCTK_VARIABLE_REAL:
+ *( (CCTK_REAL *) output_values[i]) = pow(*( (CCTK_REAL *) output_values[i]), 1.0/4.0);
+ break;
+ #ifdef HAVE_CCTK_REAL4
+ case CCTK_VARIABLE_REAL4:
+ *( (CCTK_REAL4 *) output_values[i]) = pow(*( (CCTK_REAL4 *) output_values[i]), 1.0/4.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL8
+ case CCTK_VARIABLE_REAL8:
+ *( (CCTK_REAL8 *) output_values[i]) = pow(*( (CCTK_REAL8 *) output_values[i]), 1.0/4.0);
+ break;
+ #endif
+ #ifdef HAVE_CCTK_REAL16
+ case CCTK_VARIABLE_REAL16:
+ *( (CCTK_REAL16 *) output_values[i]) = pow(*( (CCTK_REAL16 *) output_values[i]), 1.0/4.0);
+ break;
+ #endif
+ case CCTK_VARIABLE_COMPLEX:
+ cmplx_tmp.Re = (*( (CCTK_COMPLEX *) output_values[i])).Re;
+ cmplx_tmp.Im = (*( (CCTK_COMPLEX *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/4.0);
+ (*( (CCTK_COMPLEX *) output_values[i])).Re = cmplx_tmp.Re;
+ (*( (CCTK_COMPLEX *) output_values[i])).Im = cmplx_tmp.Im;
+ break;
+ #ifdef HAVE_CCTK_COMPLEX8
+ case CCTK_VARIABLE_COMPLEX8:
+ cmplx_tmp8.Re = (*( (CCTK_COMPLEX8 *) output_values[i])).Re;
+ cmplx_tmp8.Im = (*( (CCTK_COMPLEX8 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/4.0);
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Re = cmplx_tmp8.Re;
+ (*( (CCTK_COMPLEX8 *) output_values[i])).Im = cmplx_tmp8.Im;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX16
+ case CCTK_VARIABLE_COMPLEX16:
+ cmplx_tmp16.Re = (*( (CCTK_COMPLEX16 *) output_values[i])).Re;
+ cmplx_tmp16.Im = (*( (CCTK_COMPLEX16 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/4.0);
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Re = cmplx_tmp16.Re;
+ (*( (CCTK_COMPLEX16 *) output_values[i])).Im = cmplx_tmp16.Im;
+ break;
+ #endif
+ #ifdef HAVE_CCTK_COMPLEX32
+ case CCTK_VARIABLE_COMPLEX32:
+ cmplx_tmp32.Re = (*( (CCTK_COMPLEX32 *) output_values[i])).Re;
+ cmplx_tmp32.Im = (*( (CCTK_COMPLEX32 *) output_values[i])).Im;
+ cmplx_tmp = CCTK_CmplxPow(cmplx_tmp, 1.0/4.0);
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Re = cmplx_tmp32.Re;
+ (*( (CCTK_COMPLEX32 *) output_values[i])).Im = cmplx_tmp32.Im;
+ break;
+ #endif
+ }
+ }
+ }
+
+ if(created_local_par_table)
+ {
+ ierr = Util_TableDestroy(param_table_handle);
+ }
+
+ return ierr;
+}