aboutsummaryrefslogtreecommitdiff
path: root/src/Topology.c
blob: 5492e6030140b0aa47644088dfa579ba0c10ec64 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
 /*@@
   @file      Topology.c
   @date      Wed Sep 13 20:10:24 2000
   @author    Tom Goodale
   @desc 
   Topology routines
   @enddesc
   @version  $Header$
 @@*/

#include <stdio.h>
#include <stdlib.h>

#include "cctk.h"

#include "pugh_Register.h"

static const char *rcsid = "$Header$";

CCTK_FILEVERSION(CactusPUGH_PUGH_Topology_c);

/********************************************************************
 *********************  Macro Definitions  **************************
 ********************************************************************/

/********************************************************************
 *********************  Local Data Types  ***************************
 ********************************************************************/

/********************************************************************
 *********************  Aliased Routine Prototypes  *****************
 ********************************************************************/

/********************************************************************
 *********************  Scheduled Routine Prototypes  ***************
 ********************************************************************/

/********************************************************************
 *********************  Fortran Wrapper Prototypes  *****************
 ********************************************************************/

/********************************************************************
 *********************  Local Routine Prototypes  *******************
 ********************************************************************/

static int FranksTopology(int dim,
                          int total_procs, 
                          const int *nsize,
                          const int *nghostzones, 
                          int *nprocs);

static int TraditionalTopology(int dim, 
                               int total_procs, 
                               const int *nsize,
                               const int *nghostzones, 
                               int *nprocs);

static int IntegerRoot(int number, int invpower);

static int IntSort (const void *a, const void *b);

/********************************************************************
 *********************  Local Data  *********************************
 ********************************************************************/

/********************************************************************
 *********************  Aliased Routines  ***************************
 ********************************************************************/

/********************************************************************
 *********************  Scheduled Routines  *************************
 ********************************************************************/
/* Fortran wrappers appear immediately after C definition of function */

int PUGH_RegisterPUGHTopologyRoutines(void);
int PUGH_RegisterPUGHTopologyRoutines(void)
{
  PUGH_RegisterGenerateTopology(TraditionalTopology,
                                "manual");
  PUGH_RegisterGenerateTopology(FranksTopology, 
                                "automatic");
  PUGH_RegisterGenerateTopology(TraditionalTopology, 
                                "automatic_old");
  return 0;
}

/********************************************************************
 *********************  Other External Routines  ********************
 ********************************************************************/

/********************************************************************
 *********************  Local Routines  *****************************
 ********************************************************************/

 /*@@
   @routine    TraditionalTopology
   @date       Fri Nov  5 11:31:21 1999
   @author     Tom Goodale
   @desc
   Generate the appropriate processor topology for this processor
   decomposition.
   @enddesc
   @history
   @hdate Tue Jan 30 17:04:50 2001 @hauthor Tom Goodale
   @hdesc Added call to integer root function and qsort to
          avoid problems with real to integer conversions and
          demonstrable failure of the algorithm when dealing
          with large prime factors.
   @endhistory
@@*/
static int TraditionalTopology(int dim, 
                               int total_procs, 
                               const int *nsize,
                               const int *nghostzones, 
                               int *nprocs)
{
  int i;
  int used_procs;
  int free_procs;
  int retval;
  int free_dims;

  used_procs = 0;
  free_procs = total_procs;

  retval = 0;

  free_dims = dim;

  for(i=0; i < dim; i++)
  {
    if((nprocs[i])>0)
    {
      free_dims--;
      if(used_procs)
      {
        used_procs *= nprocs[i];
      }
      else
      {
        used_procs = nprocs[i];
      }
      if (total_procs%used_procs)
      {
        CCTK_WARN(0, "Inconsistent PUGH topology");
        retval = 1;
      }
      else
      {
        free_procs = total_procs/used_procs;
      }
    }
  }

  /* if the user specified the number of processors for each dimension
     check that they sum up to the total number of processors available */
  if (dim > 1 && retval == 0 && free_dims == 0 && used_procs != total_procs)
  {
    CCTK_WARN(0, "Inconsistent PUGH topology");
  }

  /* Ok calculate topology if necessary */
  if(free_dims && ! retval)
  {
    /* This algorithm gives the most number of processors
     * in the highest dimension.
     */

    int *working;
    int root;
    int place;

    root = free_dims;
    working = calloc(free_dims,sizeof(int));
#ifdef DEBUG_PUGH
    printf("Processor topology for dimension %d\n",dim);
#endif


    for(i = 0; i < free_dims  ; i++)
    {
      working[i] = IntegerRoot(free_procs, root);

      while(free_procs % working[i]) working[i]--;

#ifdef DEBUG_PUGH
      printf(" working[%d] = %d\n",i,working[i]);
#endif
      free_procs /= working[i];
      root--;
    }


    /* The above doesn't necessarily sort them properly
     * e.g. if one of the factors is a prime then the
     * above will sort the 1 before the prime.
     */
    qsort(working,free_dims,sizeof(int),IntSort);

    for(i = 0,place=0; i < dim ; i++)
    {
      if(nprocs[i] <= 0)
      {
        nprocs[i] = working[place];
        place++;
      }

#ifdef DEBUG_PUGH
      printf(" nprocs[%d] = %d\n",i,nprocs[i]);
#endif
    }

    free(working);
  }

  return retval;
}



 /*@@
   @routine    FranksTopology
   @date       Thu Feb 02 17:39:21 2005
   @author     Frank Loeffler
   @desc
   Generate the appropriate processor topology for this processor
   decomposition.
   This routine tries to decompose using the information about the actual
   grid size. It can fail in certain situations in which it falls back
   gracefully to the traditional decomposition giving a warning.
   It also does not support manually set topologies and falls back in this
   case.
   @enddesc
   @history
   @endhistory
@@*/
static int FranksTopology(int dim,
                          int total_procs, 
                          const int *nsize,
                          const int *nghostzones, 
                          int *nprocs)
{
  int i;
  int max_dir, max_length;
  int free_procs = total_procs-1;
  int used_procs = 1;
  int *my_nprocs;
  
  /* Nothing to decompose here */
  if (dim == 0)
  {
    return TraditionalTopology(dim, total_procs, nsize, 
                                        nghostzones, nprocs);
  }
  my_nprocs = (int*)malloc(dim*sizeof(int));
  if (!my_nprocs)
  {
    CCTK_WARN(0, "Out of memory, malloc returned NULL");
  }

  /* start with a single domain */
  for (i = 0; i < dim; i++)
  {
    my_nprocs[i] = 1;
  }
  /* divide as long as there are processors left */
  while (free_procs)
  {
    used_procs = total_procs - free_procs;
    /* find the direction with the longest length, which is allowed */
    max_dir = -1;
    max_length = 0;
    for (i = 0; i < dim; i++)
    {
      /* is one part larger than the max? (then it might be a new max) */
      if ((nsize[i] / my_nprocs[i] > max_length) &&
          /* would there be at least one real point if we divide? */
          (nsize[i] > 2*nghostzones[i]+my_nprocs[i]) &&
          /* do we have enough processors left to divide in this direction? */
          (used_procs/my_nprocs[i] <= free_procs))
      {
        max_length = nsize[i] / my_nprocs[i];
        max_dir = i;
      }
    }
    /* if no such direction is found: fall back giving a warning */
    if (max_dir == -1)
    {
      free(my_nprocs);
      CCTK_WARN(CCTK_WARN_COMPLAIN,
                "Falling back to the old PUGH topology method");
      return TraditionalTopology(dim, total_procs, nsize, 
                                 nghostzones, nprocs);
    }
    /* count the new direction and go on */
    /* note: this is garanteed to decrement at least by one, since the number
       of used processors is naturally always >= the number of processors used
       for one dimension */
    free_procs -= used_procs/my_nprocs[max_dir];
    my_nprocs[max_dir]++;
  }
  /* If there are numbers already set up which are different from what
     we arrive at or we have funny grid sizes, fall back */
  for (i = 0; i < dim; i++)
  {
    if ((nprocs[i] && (nprocs[i]!=my_nprocs[i])) || (nsize[i]<1))
    {
      free(my_nprocs);
      CCTK_WARN(CCTK_WARN_COMPLAIN,
                "Falling back to the old PUGH topology method");
      return TraditionalTopology(dim, total_procs, nsize, 
                                          nghostzones, nprocs);
    }
  }
  for (i = 0; i < dim; i++)
  {
    nprocs[i] = my_nprocs[i];
  }
  free(my_nprocs);
  /* success */
  return 0;
}

 /*@@
   @routine    IntegerRoot
   @date       Tue Jan 30 17:06:21 2001
   @author     Tom Goodale
   @desc
   Generate the highest integer below a given integer root of an integer.
   @enddesc

   @var     number
   @vdesc   The number to take the root of
   @vtype   int
   @vio     in
   @endvar
   @var     invpower
   @vdesc   The root to take
   @vtype   int
   @vio     in
   @endvar

   @returntype int
   @returndesc
   The highest integer below the desired root.
   @endreturndesc
@@*/
static int IntegerRoot(int number, int invpower)
{
  int i;
  int tmp;
  int root;

  for(root = 1; root <= number; root++)
  {
    for(i=1, tmp=root; i < invpower; i++, tmp*=root);

    if(tmp > number)
    {
      root--;
      break;
    }
  }

  return root;
}

 /*@@
   @routine    IntSort
   @date       Tue Jan 30 17:08:47 2001
   @author     Tom Goodale
   @desc
               Sorts two integers for the qsort routine.
   @enddesc

   @var        a
   @vdesc      Pointer to first integer to compare
   @vtype      const void *
   @vio        in
   @endvar
   @var        b
   @vdesc      Pointer to second integer to compare
   @vtype      const void *
   @vio        in
   @endvar

   @returntype int
   @returndesc
               -ve if b is greater than a.<BR>
               +ve if a is greater than b.<BR>
               0   if a is equal to b.
   @endreturndesc
@@*/
static int IntSort (const void *a, const void *b)
{
  return (*(const int *) a - *(const int *) b);
}