From 0b121f6dd82443b1adc611507a49fae9232d1ddb Mon Sep 17 00:00:00 2001
From: yye00 <yye00@7daa882c-dc44-4453-834e-278d26b18e6a>
Date: Tue, 25 Jan 2005 02:06:38 +0000
Subject: matching the new -correct- complex reduction definitions

git-svn-id: http://svn.cactuscode.org/arrangements/CactusBase/LocalReduce/trunk@53 7daa882c-dc44-4453-834e-278d26b18e6a
---
 src/Norm4Functions.c | 256 +++++++++++++++++++++++++++++++++++----------------
 src/ReductionNorm2.c |   1 +
 src/ReductionNorm3.c |   1 +
 src/ReductionNorm4.c |   1 +
 4 files changed, 179 insertions(+), 80 deletions(-)

diff --git a/src/Norm4Functions.c b/src/Norm4Functions.c
index a191b10..469ecb2 100644
--- a/src/Norm4Functions.c
+++ b/src/Norm4Functions.c
@@ -2554,23 +2554,27 @@ int LocalReduce_Norm4_COMPLEX(int i, int mask_on, CCTK_INT * input_array_offsets
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -2625,9 +2629,16 @@ int LocalReduce_Norm4_COMPLEX(int i, int mask_on, CCTK_INT * input_array_offsets
 #undef REDUCTION_OPERATION
 #undef REDUCTION_INITIAL
 #undef EXTRA_STEP
+#ifdef POWER2
+#undef POWER2
+#endif
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x)*(x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #undef  REDUCTION_PREOP_CAST
@@ -2644,23 +2655,27 @@ int LocalReduce_Norm4_COMPLEX(int i, int mask_on, CCTK_INT * input_array_offsets
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -2689,23 +2704,27 @@ int LocalReduce_Norm4_COMPLEX(int i, int mask_on, CCTK_INT * input_array_offsets
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -2736,23 +2755,27 @@ int LocalReduce_Norm4_COMPLEX(int i, int mask_on, CCTK_INT * input_array_offsets
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -2766,6 +2789,9 @@ int LocalReduce_Norm4_COMPLEX(int i, int mask_on, CCTK_INT * input_array_offsets
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #undef  REDUCTION_PREOP_CAST
@@ -2783,23 +2809,27 @@ int LocalReduce_Norm4_COMPLEX(int i, int mask_on, CCTK_INT * input_array_offsets
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
     break;
@@ -2822,23 +2852,27 @@ int LocalReduce_Norm4_COMPLEX8(int i, int mask_on, CCTK_INT * input_array_offset
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -2912,23 +2946,27 @@ int LocalReduce_Norm4_COMPLEX8(int i, int mask_on, CCTK_INT * input_array_offset
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -2941,6 +2979,9 @@ int LocalReduce_Norm4_COMPLEX8(int i, int mask_on, CCTK_INT * input_array_offset
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #undef  REDUCTION_PREOP_CAST
@@ -2957,23 +2998,27 @@ int LocalReduce_Norm4_COMPLEX8(int i, int mask_on, CCTK_INT * input_array_offset
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -2987,6 +3032,9 @@ int LocalReduce_Norm4_COMPLEX8(int i, int mask_on, CCTK_INT * input_array_offset
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #undef  REDUCTION_PREOP_CAST
@@ -3004,23 +3052,27 @@ int LocalReduce_Norm4_COMPLEX8(int i, int mask_on, CCTK_INT * input_array_offset
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3051,23 +3103,24 @@ int LocalReduce_Norm4_COMPLEX8(int i, int mask_on, CCTK_INT * input_array_offset
 #ifdef POWER4
 #undef POWER4
 #endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
     break;
@@ -3090,23 +3143,27 @@ int LocalReduce_Norm4_COMPLEX16(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3180,23 +3237,27 @@ int LocalReduce_Norm4_COMPLEX16(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3225,23 +3286,27 @@ int LocalReduce_Norm4_COMPLEX16(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3255,6 +3320,9 @@ int LocalReduce_Norm4_COMPLEX16(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #undef  REDUCTION_PREOP_CAST
@@ -3272,23 +3340,27 @@ int LocalReduce_Norm4_COMPLEX16(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3319,23 +3391,27 @@ int LocalReduce_Norm4_COMPLEX16(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
     break;
@@ -3358,23 +3434,27 @@ int LocalReduce_Norm4_COMPLEX32(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3448,23 +3528,27 @@ int LocalReduce_Norm4_COMPLEX32(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3493,23 +3577,27 @@ int LocalReduce_Norm4_COMPLEX32(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3540,23 +3628,27 @@ int LocalReduce_Norm4_COMPLEX32(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
 
@@ -3587,23 +3679,27 @@ int LocalReduce_Norm4_COMPLEX32(int i, int mask_on, CCTK_INT * input_array_offse
 #ifdef POWER4
 #undef POWER4
 #endif
+#ifdef POWER2
+#undef POWER2
+#endif
+#define POWER2(x)      ((x) * (x))
 #define POWER4(x)      ((x) * (x) * (x) * (x))
 
 #ifdef  CCTK_REAL_PRECISION_4
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL4) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL4) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_8
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL8) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL8) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #elif   CCTK_REAL_PRECISION_16
 #undef  REDUCTION_PREOP_CAST
 #define REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type) \
-        inval = (CCTK_REAL16) ((typed_vdata[sum_indices]).Re);
+        inval = (CCTK_REAL16) POWER2((POWER2((typed_vdata[sum_indices]).Re)+POWER2((typed_vdata[sum_indices]).Im)));
 #endif
 
-#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + POWER4(scalar);
+#define REDUCTION_OPERATION(Norm4, scalar)   Norm4 = Norm4 + scalar;
 #define REDUCTION_INITIAL(num) num = 0;
 #define EXTRA_STEP(a, b)
     break;
diff --git a/src/ReductionNorm2.c b/src/ReductionNorm2.c
index c6c1835..6876db5 100644
--- a/src/ReductionNorm2.c
+++ b/src/ReductionNorm2.c
@@ -334,6 +334,7 @@ static int ReductionL2 (int N_dims, int operator_handle,
     ierr = Util_TableSetInt(param_table_handle, num_points, "num_points");
     ierr = Util_TableSetInt(param_table_handle, 3,"global_operation");
     ierr = Util_TableSetInt(param_table_handle, 0,"perform_division");
+    ierr = Util_TableSetInt(param_table_handle, 0,"perform_2_root");
   }
   else
   {
diff --git a/src/ReductionNorm3.c b/src/ReductionNorm3.c
index cc3bcd5..b201136 100644
--- a/src/ReductionNorm3.c
+++ b/src/ReductionNorm3.c
@@ -331,6 +331,7 @@ static int ReductionL3 (int N_dims, int operator_handle,
     ierr = Util_TableSetInt(param_table_handle, num_points, "num_points");
     ierr = Util_TableSetInt(param_table_handle, 3,"global_operation");
     ierr = Util_TableSetInt(param_table_handle, 0,"perform_division");
+    ierr = Util_TableSetInt(param_table_handle, 0,"perform_3_root");
   }
   else
   {
diff --git a/src/ReductionNorm4.c b/src/ReductionNorm4.c
index 9f895b9..37b6afa 100644
--- a/src/ReductionNorm4.c
+++ b/src/ReductionNorm4.c
@@ -331,6 +331,7 @@ static int ReductionL4 (int N_dims, int operator_handle,
     ierr = Util_TableSetInt(param_table_handle, num_points, "num_points");
     ierr = Util_TableSetInt(param_table_handle, 3,"global_operation");
     ierr = Util_TableSetInt(param_table_handle, 0,"perform_division");
+    ierr = Util_TableSetInt(param_table_handle, 0,"perform_4_root");    
   }
   else
   {
-- 
cgit v1.2.3