From e677e4043ec45f225ebb3a0b2f7009532cf42766 Mon Sep 17 00:00:00 2001 From: yye00 Date: Tue, 19 Oct 2004 16:56:13 +0000 Subject: added mpi_reduce calls, made the processor number an argument in grid array reduction git-svn-id: http://svn.cactuscode.org/arrangements/CactusPUGH/PUGHReduce/trunk@55 d60812e6-3970-4df4-986e-c251b06effeb --- src/ReduceGA.c | 500 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 370 insertions(+), 130 deletions(-) diff --git a/src/ReduceGA.c b/src/ReduceGA.c index c9c0311..fcd3c86 100644 --- a/src/ReduceGA.c +++ b/src/ReduceGA.c @@ -21,6 +21,7 @@ CCTK_FILEVERSION(CactusPUGH_PUGHReduce_ReduceGA_c); /* local function prototypes */ static int ReduceGridArrays (const cGH *GH, + int dest_proc, int local_reduce_handle, int param_table_handle, int N_input_arrays, @@ -43,6 +44,11 @@ static int ReduceGridArrays (const cGH *GH, @vtype cGH * @vio in @endvar + @var dest_proc + @vdesc the number of the processor to which we want to reduce (-1) for all-reduce + @vtype int + @vio in + @endvar @var local_reduce_handle @vdesc the handle specifying the reduction operator @vtype int @@ -80,6 +86,7 @@ static int ReduceGridArrays (const cGH *GH, @endvar @@*/ int PUGH_ReduceGridArrays (const cGH *GH, + int dest_proc, int local_reduce_handle, int param_table_handle, int N_input_arrays, @@ -89,7 +96,7 @@ int PUGH_ReduceGridArrays (const cGH *GH, void* const output_values[]) { int retval; - retval = ReduceGridArrays(GH, local_reduce_handle, + retval = ReduceGridArrays(GH, dest_proc, local_reduce_handle, param_table_handle, N_input_arrays, input_array_variable_indices, M_output_values, output_value_type_codes, @@ -111,6 +118,7 @@ int PUGH_ReduceGridArrays (const cGH *GH, @enddesc @@*/ static int ReduceGridArrays (const cGH *GH, + int dest_proc, int local_reduce_handle, int param_table_handle, int N_input_arrays, @@ -124,8 +132,9 @@ static int ReduceGridArrays (const cGH *GH, int dim = 0; int proc; int num_points=0; - int total_num_points = 0; + int total_num_points = 1; int perform_division = 1; + int perform_all_reduce = 1; CCTK_INT * lower_array_bounds; CCTK_INT * upper_array_bounds; @@ -140,6 +149,19 @@ static int ReduceGridArrays (const cGH *GH, void *local_outvals = NULL; nprocs = CCTK_nProcs(GH); myproc = CCTK_MyProc(GH); + if (dest_proc >=0 && dest_proc <=nprocs) + { + perform_all_reduce = 0; + } + else if (dest_proc == -1) + { + perform_all_reduce = 1; + } + else + { + CCTK_WARN (0, "PUGHReduce Destination processor for global reduction\n \ + is out of bounds"); + } #endif @@ -216,8 +238,16 @@ static int ReduceGridArrays (const cGH *GH, ierr = Util_TableGetInt(param_table_handle, &proc, "proc"); ierr = Util_TableGetInt(param_table_handle, &global_operation, "global_operation"); - CACTUS_MPI_ERROR (MPI_Allreduce (&num_points, &total_num_points, 1, + if (perform_all_reduce) + { + CACTUS_MPI_ERROR (MPI_Allreduce (&num_points, &total_num_points, 1, PUGH_MPI_INT, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + CACTUS_MPI_ERROR (MPI_Reduce (&num_points, &total_num_points, 1, + PUGH_MPI_INT, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } for (i = 0; i< M_output_values; i++) { @@ -230,15 +260,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_CHAR *) output_values[i], 1 * sizeof (CCTK_CHAR)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, - PUGH_MPI_CHAR, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, - PUGH_MPI_CHAR, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, - PUGH_MPI_CHAR, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, + PUGH_MPI_CHAR, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, + PUGH_MPI_CHAR, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, + PUGH_MPI_CHAR, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, + PUGH_MPI_CHAR, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, + PUGH_MPI_CHAR, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_CHAR *) output_values[i], 1, + PUGH_MPI_CHAR, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; case CCTK_VARIABLE_INT: local_outvals = malloc (1 * sizeof (CCTK_INT)); @@ -246,15 +291,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_INT *) output_values[i], 1 * sizeof (CCTK_INT)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1, - PUGH_MPI_INT, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1, - PUGH_MPI_INT, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1, - PUGH_MPI_INT, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1, + PUGH_MPI_INT, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1, + PUGH_MPI_INT, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT *) output_values[i], 1, + PUGH_MPI_INT, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT *) output_values[i], 1, + PUGH_MPI_INT, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT *) output_values[i], 1, + PUGH_MPI_INT, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT *) output_values[i], 1, + PUGH_MPI_INT, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #ifdef CCTK_INT1 case CCTK_VARIABLE_INT1: @@ -263,15 +323,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_INT1 *) output_values[i], 1 * sizeof (CCTK_INT1)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, - PUGH_MPI_INT1, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, - PUGH_MPI_INT1, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, - PUGH_MPI_INT1, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, + PUGH_MPI_INT1, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, + PUGH_MPI_INT1, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, + PUGH_MPI_INT1, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, + PUGH_MPI_INT1, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, + PUGH_MPI_INT1, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT1 *) output_values[i], 1, + PUGH_MPI_INT1, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif #ifdef CCTK_INT2 @@ -281,15 +356,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_INT2 *) output_values[i], 1 * sizeof (CCTK_INT2)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, - PUGH_MPI_INT2, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, - PUGH_MPI_INT2, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, - PUGH_MPI_INT2, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, + PUGH_MPI_INT2, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, + PUGH_MPI_INT2, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, + PUGH_MPI_INT2, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, + PUGH_MPI_INT2, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, + PUGH_MPI_INT2, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT2 *) output_values[i], 1, + PUGH_MPI_INT2, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif #ifdef CCTK_INT4 @@ -299,15 +389,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_INT4 *) output_values[i], 1 * sizeof (CCTK_INT4)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, - PUGH_MPI_INT4, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, - PUGH_MPI_INT4, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, - PUGH_MPI_INT4, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, + PUGH_MPI_INT4, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, + PUGH_MPI_INT4, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, + PUGH_MPI_INT4, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, + PUGH_MPI_INT4, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, + PUGH_MPI_INT4, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT4 *) output_values[i], 1, + PUGH_MPI_INT4, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif #ifdef CCTK_INT8 @@ -317,15 +422,31 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_INT8 *) output_values[i], 1 * sizeof (CCTK_INT8)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, - PUGH_MPI_INT8, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, - PUGH_MPI_INT8, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, - PUGH_MPI_INT8, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, + PUGH_MPI_INT8, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, + PUGH_MPI_INT8, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, + PUGH_MPI_INT8, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, + PUGH_MPI_INT8, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, + PUGH_MPI_INT8, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_INT8 *) output_values[i], 1, + PUGH_MPI_INT8, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } + break; #endif case CCTK_VARIABLE_REAL: @@ -334,15 +455,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_REAL *) output_values[i], 1 * sizeof (CCTK_REAL)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1, - PUGH_MPI_REAL, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1, - PUGH_MPI_REAL, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1, - PUGH_MPI_REAL, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1, + PUGH_MPI_REAL, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1, + PUGH_MPI_REAL, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL *) output_values[i], 1, + PUGH_MPI_REAL, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL *) output_values[i], 1, + PUGH_MPI_REAL, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL *) output_values[i], 1, + PUGH_MPI_REAL, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL *) output_values[i], 1, + PUGH_MPI_REAL, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #ifdef CCTK_REAL4 case CCTK_VARIABLE_REAL4: @@ -351,15 +487,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_REAL4 *) output_values[i], 1 * sizeof (CCTK_REAL4)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, - PUGH_MPI_REAL4, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, - PUGH_MPI_REAL4, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, - PUGH_MPI_REAL4, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, + PUGH_MPI_REAL4, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, + PUGH_MPI_REAL4, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, + PUGH_MPI_REAL4, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, + PUGH_MPI_REAL4, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, + PUGH_MPI_REAL4, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL4 *) output_values[i], 1, + PUGH_MPI_REAL4, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif #ifdef CCTK_REAL8 @@ -369,15 +520,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_REAL8 *) output_values[i], 1 * sizeof (CCTK_REAL8)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, - PUGH_MPI_REAL8, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, - PUGH_MPI_REAL8, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, - PUGH_MPI_REAL8, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, + PUGH_MPI_REAL8, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, + PUGH_MPI_REAL8, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, + PUGH_MPI_REAL8, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, + PUGH_MPI_REAL8, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, + PUGH_MPI_REAL8, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL8 *) output_values[i], 1, + PUGH_MPI_REAL8, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif #ifdef CCTK_REAL16 @@ -387,15 +553,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_REAL16 *) output_values[i], 1 * sizeof (CCTK_REAL16)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, - PUGH_MPI_REAL16, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, - PUGH_MPI_REAL16, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, - PUGH_MPI_REAL16, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, + PUGH_MPI_REAL16, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, + PUGH_MPI_REAL16, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, + PUGH_MPI_REAL16, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, + PUGH_MPI_REAL16, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, + PUGH_MPI_REAL16, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_REAL16 *) output_values[i], 1, + PUGH_MPI_REAL16, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif case CCTK_VARIABLE_COMPLEX: @@ -404,15 +585,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_COMPLEX *) output_values[i], 1 * sizeof (CCTK_COMPLEX)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, - pughGH->PUGH_mpi_complex, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, - pughGH->PUGH_mpi_complex, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, - pughGH->PUGH_mpi_complex, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, + pughGH->PUGH_mpi_complex, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, + pughGH->PUGH_mpi_complex, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, + pughGH->PUGH_mpi_complex, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, + pughGH->PUGH_mpi_complex, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, + pughGH->PUGH_mpi_complex, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX *) output_values[i], 1, + pughGH->PUGH_mpi_complex, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #ifdef CCTK_COMPLEX8 case CCTK_VARIABLE_COMPLEX8: @@ -421,15 +617,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1 * sizeof (CCTK_COMPLEX8)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, - pughGH->PUGH_mpi_complex8, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, - pughGH->PUGH_mpi_complex8, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, - pughGH->PUGH_mpi_complex8, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, + pughGH->PUGH_mpi_complex8, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, + pughGH->PUGH_mpi_complex8, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, + pughGH->PUGH_mpi_complex8, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, + pughGH->PUGH_mpi_complex8, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, + pughGH->PUGH_mpi_complex8, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX8 *) output_values[i], 1, + pughGH->PUGH_mpi_complex8, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif #ifdef CCTK_COMPLEX16 @@ -439,15 +650,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1 * sizeof (CCTK_COMPLEX16)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, - pughGH->PUGH_mpi_complex16, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, - pughGH->PUGH_mpi_complex16, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, - pughGH->PUGH_mpi_complex16, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, + pughGH->PUGH_mpi_complex16, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, + pughGH->PUGH_mpi_complex16, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, + pughGH->PUGH_mpi_complex16, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, + pughGH->PUGH_mpi_complex16, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, + pughGH->PUGH_mpi_complex16, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX16 *) output_values[i], 1, + pughGH->PUGH_mpi_complex16, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif #ifdef CCTK_COMPLEX32 @@ -457,15 +683,30 @@ static int ReduceGridArrays (const cGH *GH, /* outvals[] contains now the local reduction values */ memcpy ( local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1 * sizeof (CCTK_COMPLEX32)); - if (global_operation == 1) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, - pughGH->PUGH_mpi_complex32, MPI_MAX, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 2) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, - pughGH->PUGH_mpi_complex32, MPI_MIN, pughGH->PUGH_COMM_WORLD)); - else if (global_operation == 3) - CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, - pughGH->PUGH_mpi_complex32, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + if (perform_all_reduce) + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, + pughGH->PUGH_mpi_complex32, MPI_MAX, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, + pughGH->PUGH_mpi_complex32, MPI_MIN, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Allreduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, + pughGH->PUGH_mpi_complex32, MPI_SUM, pughGH->PUGH_COMM_WORLD)); + } + else + { + if (global_operation == 1) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, + pughGH->PUGH_mpi_complex32, MPI_MAX, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 2) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, + pughGH->PUGH_mpi_complex32, MPI_MIN, dest_proc, pughGH->PUGH_COMM_WORLD)); + else if (global_operation == 3) + CACTUS_MPI_ERROR (MPI_Reduce (local_outvals, (CCTK_COMPLEX32 *) output_values[i], 1, + pughGH->PUGH_mpi_complex32, MPI_SUM, dest_proc, pughGH->PUGH_COMM_WORLD)); + } break; #endif } @@ -474,7 +715,6 @@ static int ReduceGridArrays (const cGH *GH, free (local_outvals); #endif - if (perform_division == 0) { for (i = 0; i< M_output_values; i++) -- cgit v1.2.3