diff options
Diffstat (limited to 'src/local_reductions.h')
-rw-r--r-- | src/local_reductions.h | 50 |
1 files changed, 23 insertions, 27 deletions
diff --git a/src/local_reductions.h b/src/local_reductions.h index b223d45..57b2567 100644 --- a/src/local_reductions.h +++ b/src/local_reductions.h @@ -1,7 +1,7 @@ /*@@ @header local_reductions.h @date - @author Yaakoub El Khamra + @author Tom Goodale, Yaakoub Y El Khamra @desc Prototypes for local reduction operators @enddesc @@ -24,19 +24,23 @@ extern "C" { #define ITERATE_ON_ARRAY(i,cctk_type, in_data, out_type, out_num, mask_on,input_array_offset, indices, sum_indices, max_iter, iter, flag, actual_indices,input_array_strides, input_array_min_subscripts,product) \ { \ - iter = 0; \ - sum_indices = 0; \ + iter = 0; \ + sum_indices = 0; \ + num_points = 1; \ const cctk_type *typed_vdata = (cctk_type *)(in_data); \ - out_type * outval = (out_type *)(out_num); \ - REDUCTION_INITIAL( *outval) \ + out_type inval; \ + out_type * outval = (out_type *) out_num; \ + REDUCTION_INITIAL( * outval) \ if ( mask_on == 1) \ { \ if ( input_array_offset == 0) \ { \ while (iter < max_iter) \ { \ - REDUCTION_OPERATION(* outval, typed_vdata[sum_indices]) \ + inval = (out_type) typed_vdata[sum_indices]; \ + REDUCTION_OPERATION(*outval,inval); \ num_points++; \ + iter++; \ flag = 0; \ for (k=0;k<N_dims;k++) \ { \ @@ -46,13 +50,11 @@ extern "C" { { \ actual_indices[k] += input_array_strides[k-1]; \ indices[k]++; \ - iter++; \ flag = 0; \ break; \ } \ indices[k]++; \ actual_indices[k] += input_array_strides[k]; \ - iter++; \ break; \ } \ else if (indices[k] == iters_per_dim[k]-1) \ @@ -82,17 +84,18 @@ extern "C" { } \ else \ { \ - iter = 0; \ - sum_indices = 0; \ while (iter < max_iter) \ { \ - /* prevent offset from giving segfaults */ \ + /* prevent offset from giving segfaults */ \ if (sum_indices >= max_iter) \ { \ CCTK_WARN(1,"offsets and strides access unallocated memory"); \ return -1; \ } \ + inval = (out_type) typed_vdata[sum_indices]; \ + REDUCTION_OPERATION(*outval,inval); \ num_points++; \ + iter++; \ flag = 0; \ for (k=0;k<N_dims;k++) \ { \ @@ -102,13 +105,11 @@ extern "C" { { \ actual_indices[k] += input_array_strides[k-1]; \ indices[k]++; \ - iter++; \ flag = 0; \ break; \ } \ indices[k]++; \ actual_indices[k] += input_array_strides[k]; \ - iter++; \ break; \ } \ else if (indices[k] == iters_per_dim[k]-1) \ @@ -124,7 +125,7 @@ extern "C" { return -1; \ } \ } \ - sum_indices = actual_indices[0]+input_array_offset; \ + sum_indices = actual_indices[0]; \ for (k=N_dims-1;k>0;k--) \ { \ product = 1; \ @@ -141,11 +142,12 @@ extern "C" { { \ if ( input_array_offset == 0) \ { \ - iter = 0; \ - sum_indices = 0; \ while (iter < max_iter) \ { \ + inval = (out_type) typed_vdata[sum_indices]; \ + REDUCTION_OPERATION(*outval,inval); \ num_points++; \ + iter++; \ flag = 0; \ for (k=0;k<N_dims;k++) \ { \ @@ -155,13 +157,11 @@ extern "C" { { \ actual_indices[k] += input_array_strides[k-1]; \ indices[k]++; \ - iter++; \ flag = 0; \ break; \ } \ indices[k]++; \ actual_indices[k] += input_array_strides[k]; \ - iter++; \ break; \ } \ else if (indices[k] == iters_per_dim[k]-1) \ @@ -191,8 +191,6 @@ extern "C" { } \ else \ { \ - iter = 0; \ - sum_indices = 0; \ while (iter < max_iter) \ { \ /* prevent offset from giving segfaults */ \ @@ -201,7 +199,10 @@ extern "C" { CCTK_WARN(1,"offsets and strides access unallocated memory"); \ return -1; \ } \ + inval = (out_type) typed_vdata[sum_indices]; \ + REDUCTION_OPERATION(*outval,inval); \ num_points++; \ + iter++; \ flag = 0; \ for (k=0;k<N_dims;k++) \ { \ @@ -211,13 +212,11 @@ extern "C" { { \ actual_indices[k] += input_array_strides[k-1]; \ indices[k]++; \ - iter++; \ flag = 0; \ break; \ } \ indices[k]++; \ actual_indices[k] += input_array_strides[k]; \ - iter++; \ break; \ } \ else if (indices[k] == iters_per_dim[k]-1) \ @@ -233,7 +232,7 @@ extern "C" { return -1; \ } \ } \ - sum_indices = actual_indices[0]+input_array_offset; \ + sum_indices = actual_indices[0]; \ for (k=N_dims-1;k>0;k--) \ { \ product = 1; \ @@ -250,6 +249,7 @@ extern "C" { { \ CCTK_WARN(1, "mask_on is not set to a valid value"); \ } \ + EXTRA_STEP(*outval, (out_type)num_points) \ } @@ -263,10 +263,6 @@ int LocalReduce_L3 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); int LocalReduce_L4 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); int LocalReduce_LInf (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); int LocalReduce_Sum (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); -int LocalReduce_CmplxMax1 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); -int LocalReduce_CmplxMax2 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); -int LocalReduce_CmplxMin1 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); -int LocalReduce_CmplxMin2 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST); typedef int (*reduction_fn_t) (int N_dims, int operator_handle, int param_table_handle, int N_input_arrays, |