aboutsummaryrefslogtreecommitdiff
path: root/src/pugh_reductions.h
blob: bed64a0b6baf45c4dff69da9c292e77617a02b26 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
 /*@@
   @header    pugh_reductions.h
   @date      April 29 1999
   @author    Gabrielle Allen
   @desc
              Prototypes for pugh reduction operators
   @enddesc
   @version   $Header$
 @@*/

#ifndef _PUGH_REDUCTIONS_H_
#define _PUGH_REDUCTIONS_H_ 1


#include "cctk.h"
#include "cctk_Reduction.h"
#include "CactusPUGH/PUGH/src/include/pugh.h"

/***
   Macro to iterate over every element of an arbitrary sized array

   The array is given by
     - its CCTK type, cctk_type,
     - the number of dimensions, vdim,
     - an (untyped) pointer to its data, vdata,
     - one-dimensional vectors, from[] and to[], of length vdim,
       specifying the start- and endpoint to iterate over for each dimension

   The result of the reduction is specified by
     - the array where to store the results, outvals[]
     - the type of the intermediate result scalar, outvals_type
     - the number of scalars to reduce from this input array, num_outvals
     - the total number of scalars for all input arrays, total_outvals
       (incremented after each iteration)

   An iteration vector, iterator[], is passed in to hold the current indices
   for each dimension of the array.
   Another vector of size vdim, points_per_dim[], is used to compute the
   linear index out of the iterator[] elements.

   Before the iteration loop, a local reduction variable, typed_outval,
   is defined and initialized by the macro INITIAL_REDUCTION_VALUE(typed_vdata).

   For each iteration the macro REDUCTION_OPERATION(typed_outval, typed_inval)
   is called, with the local reduction variable and the pointer to the
   current data element.

   Finally, the local reduction variable, typed_outval, is type-casted
   to CCTK_REAL and assigned to the current output array value outvals[].
   The total output scalar counter, total_outvals, is incremented.


   NOTE: The macro assumes from[i] > to[i] for all 1 <= i < vdim such that
         at least one iteration will be performed.
         Make sure this is true when you set up the from[] and to[] vectors.

         For efficiency reasons, the innermost loop (the one for dimension 0)
         is coded as a for() loop. Thus the overhead to iterate over a
         one-dimensional array should be kept to a minimum.
***/
#define ITERATE_ARRAY(cctk_type, vdim, vdata, from, to, iterator,             \
                      points_per_dim,                                         \
                      outvals_type, outvals, num_outvals, total_outvals)      \
    {                                                                         \
      int _i, _j, _dim, _vindex;                                              \
      const cctk_type *typed_vdata = (vdata);                                 \
      outvals_type typed_outval;                                              \
                                                                              \
                                                                              \
      if (have_local_points)                                                  \
      {                                                                       \
        for (_j = 0; _j < num_outvals; _j++, typed_vdata++)                   \
        {                                                                     \
          /* get the linear index of the element to start with */             \
          _vindex = from[0];                                                  \
          for (_i = 1; _i < vdim; _i++)                                       \
            _vindex += from [_i] * points_per_dim [_i];                       \
          typed_outval = INITIAL_REDUCTION_VALUE(typed_vdata + _vindex);      \
                                                                              \
          /* set iterator to local startpoint */                              \
          memcpy (iterator, from, vdim * sizeof (int));                       \
                                                                              \
          /* do the nested loops starting with the second-innermost */        \
          _dim = 1;                                                           \
          while (1)                                                           \
          {                                                                   \
            /* get the linear index */                                        \
            _vindex = 0;                                                      \
            for (_i = 1; _i < vdim; _i++)                                     \
              _vindex += iterator [_i] * points_per_dim [_i];                 \
                                                                              \
            /* do the reduction for the innermost loop (lowest dimension) */  \
            for (_i = from [0]; _i < to [0]; _i++)                            \
            {                                                                 \
              REDUCTION_OPERATION (typed_outval, typed_vdata [_i + _vindex]); \
            }                                                                 \
                                                                              \
            if (vdim > 1)                                                     \
            {                                                                 \
              /* increment current looper and check for end */                \
              if (++iterator [_dim] >= to [_dim])                             \
              {                                                               \
                /* increment outermost loopers */                             \
                for (_dim++; _dim < vdim; _dim++)                             \
                {                                                             \
                  if (++iterator [_dim] < to [_dim])                          \
                    break;                                                    \
                }                                                             \
                                                                              \
                /* done if beyond outermost loop */                           \
                if (_dim >= vdim)                                             \
                  break;                                                      \
                                                                              \
                /* reset innermost loopers */                                 \
                for (_dim--; _dim >= 0; _dim--)                               \
                  iterator [_dim] = from [_dim];                              \
                _dim = 1;                                                     \
              }                                                               \
            }                                                                 \
            else                                                              \
            {                                                                 \
              /* exit loop if array is one-dimensional */                     \
              break;                                                          \
            }                                                                 \
                                                                              \
          } /* end of nested loops over all dimensions */                     \
                                                                              \
          outvals [total_outvals++] = (CCTK_REAL) typed_outval;               \
                                                                              \
        } /* end of loop over num_outvals */                                  \
      }                                                                       \
      else                                                                    \
      {                                                                       \
        total_outvals += num_outvals;                                         \
      }                                                                       \
    }


#ifdef __cplusplus
extern "C" {
#endif
int PUGH_ReduceGridArrays (REDUCTION_GRID_ARRAY_OPERATOR_REGISTER_ARGLIST);

int PUGH_ReduceArraysGlobally(REDUCTION_ARRAYS_GLOBALLY_OPERATOR_REGISTER_ARGLIST);

int PUGH_ReductionAvgGVs     (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionCountGVs   (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionMaxValGVs  (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionMinValGVs  (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm1GVs   (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm2GVs   (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm3GVs   (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm4GVs   (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNormInfGVs (REDUCTION_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionSumGVs     (REDUCTION_OPERATOR_REGISTER_ARGLIST);

int PUGH_ReductionAvgArrays     (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionCountArrays   (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionMaxValArrays  (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionMinValArrays  (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm1Arrays   (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm2Arrays   (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm3Arrays   (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNorm4Arrays   (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionNormInfArrays (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);
int PUGH_ReductionSumArrays     (REDUCTION_ARRAY_OPERATOR_REGISTER_ARGLIST);

typedef int (*reduction_fn_t) (const cGH *GH,
                               int proc,
                               int num_dims,
                               const int from[/* dim */],
                               const int to[/* dim */],
                               int iterator[/* dim */],
                               const int points_per_dim[/* dim */],
                               int num_points,
                               int have_local_points,
                               int num_inarrays,
                               const int intypes[/* num_inarrays */],
                               const void *const inarrays[/* num_inarrays */],
                               int num_outvals,
                               CCTK_REAL outvals[/*num_inarrays*num_outvals*/]);

int PUGH_ReductionGVs (const cGH *GH,
                       int proc,
                       int num_invars,
                       const int invars[/* num_invars */],
                       int outtype,
                       int num_outvals,
                       void *outvals /* [num_outvals] */,
                       reduction_fn_t reduction_fn);
int PUGH_ReductionArrays (const cGH *GH,
                          int proc,
                          int num_dims,
                          const int dims[/* num_dims */],
                          int intype,
                          int num_inarrays,
                          const void *const inarrays[/* num_inarrays */],
                          int outtype,
                          int num_outvals,
                          void *outvals /* [num_outvals] */,
                          reduction_fn_t reduction_fn);

#ifdef __cplusplus
}
#endif

#endif