aboutsummaryrefslogtreecommitdiff
path: root/src/local_reductions.h
blob: 631606327af65184324ce3b56bdb5c84c85da49c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
 /*@@
   @header    local_reductions.h
   @date      
   @author    Tom Goodale, Yaakoub Y El Khamra
   @desc
              Prototypes for local reduction operators
   @enddesc
   @version   $Header$
 @@*/

#ifndef _LOCAL_REDUCTIONS_H_
#define _LOCAL_REDUCTIONS_H_


#include "cctk.h"
#include "util_Table.h"
#include "cctk_Reduction.h"

#define ABS(x)      ((x) < 0 ? -(x) : (x))

#ifdef __cplusplus
extern "C" {
#endif


#define ITERATE_ON_ARRAY(i,cctk_type, in_data, out_type, out_num, weight_on, weight, input_array_offset, indices, sum_indices, max_iter, iter, flag, actual_indices,input_array_strides, input_array_min_subscripts,input_array_dims,product)           \
{                                                                                 \
  const cctk_type * typed_vdata = (const cctk_type *)(in_data);                   \
  out_type inval;                                                                 \
  out_type * outval = (out_type *) out_num;                                       \
  iter = 0;                                                                       \
  sum_indices = 0;                                                                \
  num_points = 1;                                                                 \
  REDUCTION_INITIAL( * outval)                                                    \
                                                                                  \
  if ( N_dims == 0 )                                                              \
  {                                                                               \
    REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type);               \
    REDUCTION_OPERATION(*outval,inval);                                           \
  }                                                                               \
  else                                                                            \
  {                                                                               \
  if ( weight_on == 1)                                                            \
  {                                                                               \
    if ( input_array_offset == 0)                                                 \
    {                                                                             \
      while (iter < max_iter)                                                     \
      {                                                                           \
        sum_indices = actual_indices[0];                                          \
        for (k=N_dims-1;k>0;k--)                                                  \
        {                                                                         \
          product = 1;                                                            \
          for (j=k-1;j>=0;j--)                                                    \
          {                                                                       \
            product *= input_array_dims[j];                                       \
          }                                                                       \
          sum_indices += actual_indices[k]*product;                               \
        }                                                                         \
        weight_value = ((CCTK_REAL *) weight)[sum_indices];                       \
                                                                                  \
        REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type);           \
        WEIGHTED_REDUCTION_OPERATION(*outval,inval,weight_value);                 \
        num_points++;                                                             \
        weight_sum += weight_value;                                               \
        iter++;                                                                   \
        flag = 0;                                                                 \
        for (k=0;k<N_dims;k++)                                                    \
        {                                                                         \
          if( indices[k] < iters_per_dim[k]-1)                                    \
          {                                                                       \
            if (flag==1)                                                          \
            {                                                                     \
              actual_indices[k] += input_array_strides[k];                        \
              indices[k]++;                                                       \
              flag = 0;                                                           \
              break;                                                              \
            }                                                                     \
            indices[k]++;                                                         \
            actual_indices[k] += input_array_strides[k];                          \
            break;                                                                \
          }                                                                       \
          else if (indices[k] == iters_per_dim[k]-1)                              \
          {                                                                       \
            indices[k] = 0;                                                       \
            actual_indices[k] = input_array_min_subscripts[k];                    \
            flag = 1;                                                             \
            continue;                                                             \
          }                                                                       \
          else                                                                    \
          {                                                                       \
            CCTK_WARN(1,"indices out of bounds, this should not happen");         \
            return -1;                                                            \
          }                                                                       \
        }                                                                         \
      }                                                                           \
    }                                                                             \
    else                                                                          \
    {                                                                             \
      while (iter < max_iter)                                                     \
      {                                                                           \
        sum_indices = actual_indices[0];                                          \
        for (k=N_dims-1;k>0;k--)                                                  \
        {                                                                         \
          product = 1;                                                            \
          for (j=k-1;j>=0;j--)                                                    \
          {                                                                       \
            product *= actual_iters_per_dim[j];                                   \
          }                                                                       \
          sum_indices += actual_indices[k]*product;                               \
        }                                                                         \
        /* prevent offset from giving segfaults */                                \
        if (sum_indices >= max_iter)                                              \
        {                                                                         \
          CCTK_WARN(1,"offsets and strides access unallocated memory");           \
          return -1;                                                              \
        }                                                                         \
        REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type);           \
        WEIGHTED_REDUCTION_OPERATION(*outval,inval,weight_value);                 \
        num_points++;                                                             \
        iter++;                                                                   \
        flag = 0;                                                                 \
        for (k=0;k<N_dims;k++)                                                    \
        {                                                                         \
          if( indices[k] < iters_per_dim[k]-1)                                    \
          {                                                                       \
            if (flag==1)                                                          \
            {                                                                     \
              actual_indices[k] += input_array_strides[k-1];                      \
              indices[k]++;                                                       \
              flag = 0;                                                           \
              break;                                                              \
            }                                                                     \
            indices[k]++;                                                         \
            actual_indices[k] += input_array_strides[k];                          \
            break;                                                                \
          }                                                                       \
          else if (indices[k] == iters_per_dim[k]-1)                              \
          {                                                                       \
            indices[k] = 0;                                                       \
            actual_indices[k] = input_array_min_subscripts[k];                    \
            flag = 1;                                                             \
            continue;                                                             \
          }                                                                       \
          else                                                                    \
          {                                                                       \
            CCTK_WARN(1,"indices out of bounds, this should not happen");         \
            return -1;                                                            \
          }                                                                       \
        }                                                                         \
      }                                                                           \
    }                                                                             \
  }                                                                               \
  else if (weight_on ==0)                                                         \
  {                                                                               \
    if ( input_array_offset == 0)                                                 \
    {                                                                             \
      while (iter < max_iter)                                                     \
      {                                                                           \
        sum_indices = actual_indices[0];                                          \
        for (k=N_dims-1;k>0;k--)                                                  \
        {                                                                         \
          product = 1;                                                            \
          for (j=k-1;j>=0;j--)                                                    \
          {                                                                       \
            product *= input_array_dims[j];                                       \
          }                                                                       \
          sum_indices += actual_indices[k]*product;                               \
        }                                                                         \
        REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type);           \
        REDUCTION_OPERATION(*outval,inval);                                       \
        num_points++;                                                             \
        weight_sum += weight_value;                                               \
        iter++;                                                                   \
        flag = 0;                                                                 \
        for (k=0;k<N_dims;k++)                                                    \
        {                                                                         \
          if( indices[k] < iters_per_dim[k]-1)                                    \
          {                                                                       \
            if (flag==1)                                                          \
            {                                                                     \
              actual_indices[k] += input_array_strides[k];                        \
              indices[k]++;                                                       \
              flag = 0;                                                           \
              break;                                                              \
            }                                                                     \
            indices[k]++;                                                         \
            actual_indices[k] += input_array_strides[k];                          \
            break;                                                                \
          }                                                                       \
          else if (indices[k] == iters_per_dim[k]-1)                              \
          {                                                                       \
            indices[k] = 0;                                                       \
            actual_indices[k] = input_array_min_subscripts[k];                    \
            flag = 1;                                                             \
            continue;                                                             \
          }                                                                       \
          else                                                                    \
          {                                                                       \
            CCTK_WARN(1,"indices out of bounds, this should not happen");         \
            return -1;                                                            \
          }                                                                       \
        }                                                                         \
      }                                                                           \
    }                                                                             \
    else                                                                          \
    {                                                                             \
      while (iter < max_iter)                                                     \
      {                                                                           \
        sum_indices = actual_indices[0];                                          \
        for (k=N_dims-1;k>0;k--)                                                  \
        {                                                                         \
          product = 1;                                                            \
          for (j=k-1;j>=0;j--)                                                    \
          {                                                                       \
            product *= actual_iters_per_dim[j];                                   \
          }                                                                       \
          sum_indices += actual_indices[k]*product;                               \
        }                                                                         \
        /* prevent offset from giving segfaults */                                \
        if (sum_indices >= max_iter)                                              \
        {                                                                         \
          CCTK_WARN(1,"offsets and strides access unallocated memory");           \
          return -1;                                                              \
        }                                                                         \
        REDUCTION_PREOP_CAST(inval, typed_vdata,sum_indices, out_type);           \
        REDUCTION_OPERATION(*outval,inval);                                       \
        num_points++;                                                             \
        iter++;                                                                   \
        flag = 0;                                                                 \
        for (k=0;k<N_dims;k++)                                                    \
        {                                                                         \
          if( indices[k] < iters_per_dim[k]-1)                                    \
          {                                                                       \
            if (flag==1)                                                          \
            {                                                                     \
              actual_indices[k] += input_array_strides[k-1];                      \
              indices[k]++;                                                       \
              flag = 0;                                                           \
              break;                                                              \
            }                                                                     \
            indices[k]++;                                                         \
            actual_indices[k] += input_array_strides[k];                          \
            break;                                                                \
          }                                                                       \
          else if (indices[k] == iters_per_dim[k]-1)                              \
          {                                                                       \
            indices[k] = 0;                                                       \
            actual_indices[k] = input_array_min_subscripts[k];                    \
            flag = 1;                                                             \
            continue;                                                             \
          }                                                                       \
          else                                                                    \
          {                                                                       \
            CCTK_WARN(1,"indices out of bounds, this should not happen");         \
            return -1;                                                            \
          }                                                                       \
        }                                                                         \
      }                                                                           \
    }                                                                             \
  }                                                                               \
  else                                                                            \
  {                                                                               \
    CCTK_WARN(1, "weight_on is not set to a valid value");                        \
  }                                                                               \
  }                                                                               \
  EXTRA_STEP(*outval, (out_type)num_points)                                       \
}


int LocalReduce_Mean     (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_Max  (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_Min  (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_Count   (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_Sum     (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);

int LocalReduce_L1   (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_L2   (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_L3   (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_L4   (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);
int LocalReduce_LInf (REDUCTION_LOCAL_ARRAY_OPERATOR_REGISTER_ARGLIST);


typedef int (*reduction_fn_t) (int N_dims, int operator_handle, 
                        int param_table_handle,   int N_input_arrays,
                        const CCTK_INT input_array_dims[], 
                        const CCTK_INT input_array_type_codes[],
                        const void *const input_arrays[],
                        int M_output_numbers,
                        const CCTK_INT output_number_type_codes[],
                        void * const output_numbers[]);

int LocalReduce_Reduce (int N_dims, int operator_handle, 
                          int param_table_handle,   int N_input_arrays,
                          const CCTK_INT input_array_dims[], 
                          const CCTK_INT input_array_type_codes[],
                          const void *const input_arrays[],
                          int M_output_numbers,
                          const CCTK_INT output_number_type_codes[],
                          void * const output_numbers[],reduction_fn_t reduction_fn);



#ifdef __cplusplus
}
#endif




#endif