aboutsummaryrefslogtreecommitdiff
path: root/src/local_reductions.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/local_reductions.h')
-rw-r--r--src/local_reductions.h50
1 files changed, 23 insertions, 27 deletions
diff --git a/src/local_reductions.h b/src/local_reductions.h
index b223d45..57b2567 100644
--- a/src/local_reductions.h
+++ b/src/local_reductions.h
@@ -1,7 +1,7 @@
/*@@
@header local_reductions.h
@date
- @author Yaakoub El Khamra
+ @author Tom Goodale, Yaakoub Y El Khamra
@desc
Prototypes for local reduction operators
@enddesc
@@ -24,19 +24,23 @@ extern "C" {
#define ITERATE_ON_ARRAY(i,cctk_type, in_data, out_type, out_num, mask_on,input_array_offset, indices, sum_indices, max_iter, iter, flag, actual_indices,input_array_strides, input_array_min_subscripts,product) \
{ \
- iter = 0; \
- sum_indices = 0; \
+ iter = 0; \
+ sum_indices = 0; \
+ num_points = 1; \
const cctk_type *typed_vdata = (cctk_type *)(in_data); \
- out_type * outval = (out_type *)(out_num); \
- REDUCTION_INITIAL( *outval) \
+ out_type inval; \
+ out_type * outval = (out_type *) out_num; \
+ REDUCTION_INITIAL( * outval) \
if ( mask_on == 1) \
{ \
if ( input_array_offset == 0) \
{ \
while (iter < max_iter) \
{ \
- REDUCTION_OPERATION(* outval, typed_vdata[sum_indices]) \
+ inval = (out_type) typed_vdata[sum_indices]; \
+ REDUCTION_OPERATION(*outval,inval); \
num_points++; \
+ iter++; \
flag = 0; \
for (k=0;k<N_dims;k++) \
{ \
@@ -46,13 +50,11 @@ extern "C" {
{ \
actual_indices[k] += input_array_strides[k-1]; \
indices[k]++; \
- iter++; \
flag = 0; \
break; \
} \
indices[k]++; \
actual_indices[k] += input_array_strides[k]; \
- iter++; \
break; \
} \
else if (indices[k] == iters_per_dim[k]-1) \
@@ -82,17 +84,18 @@ extern "C" {
} \
else \
{ \
- iter = 0; \
- sum_indices = 0; \
while (iter < max_iter) \
{ \
- /* prevent offset from giving segfaults */ \
+ /* prevent offset from giving segfaults */ \
if (sum_indices >= max_iter) \
{ \
CCTK_WARN(1,"offsets and strides access unallocated memory"); \
return -1; \
} \
+ inval = (out_type) typed_vdata[sum_indices]; \
+ REDUCTION_OPERATION(*outval,inval); \
num_points++; \
+ iter++; \
flag = 0; \
for (k=0;k<N_dims;k++) \
{ \
@@ -102,13 +105,11 @@ extern "C" {
{ \
actual_indices[k] += input_array_strides[k-1]; \
indices[k]++; \
- iter++; \
flag = 0; \
break; \
} \
indices[k]++; \
actual_indices[k] += input_array_strides[k]; \
- iter++; \
break; \
} \
else if (indices[k] == iters_per_dim[k]-1) \
@@ -124,7 +125,7 @@ extern "C" {
return -1; \
} \
} \
- sum_indices = actual_indices[0]+input_array_offset; \
+ sum_indices = actual_indices[0]; \
for (k=N_dims-1;k>0;k--) \
{ \
product = 1; \
@@ -141,11 +142,12 @@ extern "C" {
{ \
if ( input_array_offset == 0) \
{ \
- iter = 0; \
- sum_indices = 0; \
while (iter < max_iter) \
{ \
+ inval = (out_type) typed_vdata[sum_indices]; \
+ REDUCTION_OPERATION(*outval,inval); \
num_points++; \
+ iter++; \
flag = 0; \
for (k=0;k<N_dims;k++) \
{ \
@@ -155,13 +157,11 @@ extern "C" {
{ \
actual_indices[k] += input_array_strides[k-1]; \
indices[k]++; \
- iter++; \
flag = 0; \
break; \
} \
indices[k]++; \
actual_indices[k] += input_array_strides[k]; \
- iter++; \
break; \
} \
else if (indices[k] == iters_per_dim[k]-1) \
@@ -191,8 +191,6 @@ extern "C" {
} \
else \
{ \
- iter = 0; \
- sum_indices = 0; \
while (iter < max_iter) \
{ \
/* prevent offset from giving segfaults */ \
@@ -201,7 +199,10 @@ extern "C" {
CCTK_WARN(1,"offsets and strides access unallocated memory"); \
return -1; \
} \
+ inval = (out_type) typed_vdata[sum_indices]; \
+ REDUCTION_OPERATION(*outval,inval); \
num_points++; \
+ iter++; \
flag = 0; \
for (k=0;k<N_dims;k++) \
{ \
@@ -211,13 +212,11 @@ extern "C" {
{ \
actual_indices[k] += input_array_strides[k-1]; \
indices[k]++; \
- iter++; \
flag = 0; \
break; \
} \
indices[k]++; \
actual_indices[k] += input_array_strides[k]; \
- iter++; \
break; \
} \
else if (indices[k] == iters_per_dim[k]-1) \
@@ -233,7 +232,7 @@ extern "C" {
return -1; \
} \
} \
- sum_indices = actual_indices[0]+input_array_offset; \
+ sum_indices = actual_indices[0]; \
for (k=N_dims-1;k>0;k--) \
{ \
product = 1; \
@@ -250,6 +249,7 @@ extern "C" {
{ \
CCTK_WARN(1, "mask_on is not set to a valid value"); \
} \
+ EXTRA_STEP(*outval, (out_type)num_points) \
}
@@ -263,10 +263,6 @@ int LocalReduce_L3 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_L4 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_LInf (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_Sum (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
-int LocalReduce_CmplxMax1 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
-int LocalReduce_CmplxMax2 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
-int LocalReduce_CmplxMin1 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
-int LocalReduce_CmplxMin2 (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
typedef int (*reduction_fn_t) (int N_dims, int operator_handle,
int param_table_handle, int N_input_arrays,