Carpet/Carpet/src/LoadBalanceReal/carpet_boxtypes.f90


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690

module carpet_boxtypes

  use carpet_region

  implicit none

  integer :: ghostsize
  real(wp) :: alpha
  logical :: limit_size
  integer :: granularity
  integer :: granularity_boundary
  integer :: procid

  contains

!   The basic calculation of the cost in each of the 3 directions.
    subroutine cost_box ( reg, cost )
      type(bbox), intent(in) :: reg
      integer, dimension(3), intent(out) :: cost
      integer :: i
    
      do i = 1, 3
        cost(i) = (reg%dim(i)%upper -  reg%dim(i)%lower)/reg%dim(i)%stride + 1
      end do
    end subroutine cost_box


!   Find a sorted list of the best options for splitting npoints in 2 chunks
!   among nprocs processors. The fractional remainder is passed in through 
!   fract. The maximum allowed number of options to return is nmax, and the
!   actual number of returned options is nact. The result is returned in
!   np (number of procs for the first chunk) and nc (a measure of the
!   imabalance of the split.
    subroutine choosenprocs ( npoints, nprocs, fract, nmax, nact, np, nc )
      integer, intent(in) :: npoints, nmax
      real(wp), intent(in) :: nprocs, fract
      integer, intent(out) :: nact
      integer, dimension(:), intent(out) :: np
      real(wp), dimension(:), intent(out) :: nc
      integer :: idealcost
      integer :: quot, i, j, round, ind
      real(wp) :: c1, c2, currentimbal
      logical :: exchange

!     The idealcost should be npoints/nprocs but I rescale with nprocs for
!     The purpose of avoiding some problems with floating point comparisons.
      idealcost = npoints

!     Initialize the cost to the maximum possible floating point number.
      nc = minval( empty )

!     initialize the actual returned number of options to 0.
      nact = 0

!     Initialize an integer index to zero. Used to keep track where the current
!     option should be inserted in the result vector.
      ind = 1

!     Calculate the starting number of processors to start the search with
!     (quot) as the largest integer smaller than nprocs.
      quot = floor(nprocs)
!     If the fractional part we have to leave unsplit is larger than 1
!     decrement quot by 1.
      if ( fract > 1.0_wp ) quot = quot-1
!     If nprocs has zero fractional part, we can start at quot/2 due to
!     symmetry.
      if ( .not. ( nprocs>quot ) ) then
        quot = quot / 2
      end if

!     Try all options until reaching a single processor.
      options: do i=quot, 1, -1

!       Calculate the number of points that will yield the smallest imbalance.
        round = nint(real(npoints,wp)/nprocs*i)

!       Calculate the imbalance for the current choice of processors.
!       This has been scaled by nprocs (like idealcost has been above).
        c1 = abs ( (round*nprocs)/i - idealcost )
        c2 = abs ( (npoints-round)/(nprocs-i)*nprocs - idealcost )
        currentimbal = max ( c1, c2 )

!       Initialize the boolean exchange that keeps track of whether the current
!       choice needs to replace an entry in the return vectors.
        exchange = .false.

!       Start from the current number of entries in the return vector and check
!       if the current choice is better than any of them. If so indicate that
!       an exchange has to happen and record the index.
        do j = nact, 1, -1
          if ( currentimbal < nc(j) ) then
            exchange = .true.
            ind = j
          end if
        end do

!       If an exchange has to happen, shift all elements with indices higher
!       than or equal to ind to the right.
        if (exchange) then
          np(ind:nmax) = eoshift(np(ind:nmax),-1)
          nc(ind:nmax) = eoshift(nc(ind:nmax),-1)
        end if

!       If we don't need to do an exchange, and if nact<nmax add the current
!       case after the previous entries.
        if (nact<nmax .and. .not.exchange) then
          nact = nact+1
          np(nact) = i
          nc(nact) = currentimbal
        end if

!       If the return vectors are empty or an exhange has to be done, insert
!       the current choice at the appropriate position (ind) and increment the
!       number of elements in the return vector (capped at nmax)
        if (nact == 0 .or. exchange) then
          nact = min(nact+1,nmax)
          np(ind) = i
          nc(ind) = currentimbal
        end if
      end do options

!     This is commented out for now. Used to return only the elements that are
!     as good as the best case.
!      nbad = nact + 1
!      do i = 2, nact
!        if ( abs(nc(i)-nc(i-1))>1.0d-10 ) nbad = i
!      end do
!      nact = nbad - 1
    end subroutine choosenprocs


!   Calculate a measure of the load imbalance of a super region.
!   The measure is a weighted sum taking into account the imbalance in the
!   number of points (computation) and the imbalance in the number of ghost
!   zones (communication). Ghost zones are added with a multiplication factor
!   of alpha.
    subroutine calc_imbalance ( sregion, imbalance )
!     The intent has been removed to make it compile with gfortran 4.1.
!      type(superregion2), pointer, intent(in) :: sregion
      type(superregion2), pointer :: sregion
      real(wp), intent(out) :: imbalance
      integer, dimension(3) :: cost
      real(wp) :: maxcost, maxgcost, n
      real(wp) :: idealcost, idealgcost

!     Initialize counter and accumulation variables
      n = 0.0_wp
      maxcost = 0.0_wp
      idealcost = 0.0_wp
      maxgcost = 0.0_wp
      idealgcost = 0.0_wp

!     Traverse the tree structure and add values for all leaves.
      call traverse_tree ( sregion )

!     Calculate the ideal cost
      idealcost = idealcost/n
      idealgcost = idealgcost/n

!     Calculate the imbalance
      imbalance = ( maxcost/idealcost - 1.0_wp ) &
                  + alpha * ( maxgcost/idealgcost - 1.0_wp )

      contains

!       Routine to traverse the tree and accumulate the cost of the leaves.
        recursive subroutine traverse_tree ( sreg )
!         The intent has been removed to make it compile with gfortran 4.1.
!          type(superregion2), pointer, intent(in) :: sreg
          type(superregion2), pointer :: sreg
          integer :: ich, nch, tcost, gcost

!         If the region has children recursively traverse the children.
          if ( associated(sreg%children) ) then
            nch = size(sreg%children)
            do ich = 1, nch
              call traverse_tree ( sreg%children(ich)%point )
            end do

!         Otherwise it's a leaf and we accumulate the cost and update the
!         maximum cost.
          else
            call cost_box ( sreg%extent, cost )
            tcost = product ( cost, mask = cost /= 0 )
            gcost = product ( cost+2*ghostsize, mask = cost /= 0 ) - tcost
            tcost = tcost
            gcost = gcost
            maxcost = max ( maxcost, tcost / sreg%frac )
            maxgcost = max ( maxgcost, gcost / sreg%frac )
            idealcost = idealcost + tcost
            idealgcost = idealgcost + gcost
            n = n + sreg%frac
          end if

        end subroutine traverse_tree

    end subroutine calc_imbalance


!   Assign processor numbers to the leaves in a tree starting from start_proc.
    subroutine AssignProcs ( minproc, maxproc, frac1, frac2, sregion )
      integer, intent(in) :: minproc, maxproc
      real(wp ), intent(in) :: frac1, frac2
!     The intent has been removed to make it compile with gfortran 4.1.
!      type(superregion2), pointer, intent(inout) :: sregion
      type(superregion2), pointer :: sregion

!     Initialize.
      if ( frac1 == 0 ) then
        procid = minproc
      else
        procid = minproc + 1
      end if

!     Traverse the tree.
      call traverse_treep ( sregion )
      
      contains

!       Routine to traverse the tree and assign processor ids to the leaves.
        recursive subroutine traverse_treep ( sreg )
!          The intent has been removed to make it compile with gfortran 4.1.
!          type(superregion2), pointer, intent(inout) :: sreg
          type(superregion2), pointer :: sreg
          integer :: ich, nch, maxcost, np
          integer, dimension(3) :: cost
          integer, dimension(1) :: mydim

!         If the region has children recursively traverse the children.
          if ( associated(sreg%children) ) then
            nch = size(sreg%children)
            do ich = 1, nch
              call traverse_treep ( sreg%children(ich)%point )
            end do

!         Otherwise it's a leaf and we assign the processor id and increment it.
          else
            if ( sreg%frac == 1.0_wp ) then
              sreg%processor = procid
              procid = procid + 1
            else
              if ( ( frac1 /= 0.0_wp ) .and. ( frac2 /= 0.0_wp ) ) then
                call cost_box ( sreg%extent, cost )
                maxcost = maxval(cost)
                mydim = maxloc (cost)
                np = nint(frac1/(frac1+frac2)*cost(mydim(1)))
                call split_sregion ( sreg, mydim(1), (/ np /) )
                sreg%processor = -1
                sreg%children(1)%point%processor = minproc
                sreg%children(1)%point%frac = frac1
                sreg%children(2)%point%processor = maxproc
                sreg%children(2)%point%frac = frac2
              else if ( frac1 == 0.0_wp ) then
                sreg%processor = maxproc
              else if ( frac2 == 0.0_wp ) then
                sreg%processor = minproc
              end if
              
            end if
          end if

        end subroutine traverse_treep

      end subroutine AssignProcs


!   Routine to sum up the load of an array of super regions. Return the result
!   in the pre-allocated array of length nprocs. The array should be
!   initialized to zero.
    subroutine calc_proc_load ( sregions, proc_load )
      type(ptr), dimension(:), intent(in) :: sregions
      integer, dimension(:), intent(inout) :: proc_load
      integer :: i

!     For each of the super regions in the array.
      do i = 1, size(sregions)

!       Traverse the tree and 
        call traverse_treel ( sregions(i)%point )
      end do

      contains

!       Routine to traverse the tree and add the load of each leaf to the
!       assigned processor.
        recursive subroutine traverse_treel ( sreg )
!         The intent has been removed to make it compile with gfortran 4.1.
!          type(superregion2), pointer, intent(in) :: sreg
          type(superregion2), pointer :: sreg
          integer, dimension(3) :: cost
          integer :: ich, nch

!         If the region has children recursively traverse the children.
          if ( associated(sreg%children) ) then
            nch = size(sreg%children)
            do ich = 1, nch
              call traverse_treel ( sreg%children(ich)%point )
            end do

!         Otherwise it's a leaf and we calculate the cost and accumulate it
!         into the correct spot in the proc_load array.

          else
            call cost_box ( sreg%extent, cost )
            proc_load(sreg%processor+1) = proc_load(sreg%processor+1) + &
                                          product ( cost )
          end if

        end subroutine traverse_treel

    end subroutine calc_proc_load


!   Split a superregion (newreg) recursively into smaller boxes while trying
!   to minimize the load imbalance. Here we allow for fractional processor
!   numbers, nprocs. The fraction that should be left unsplit is given by
!   fract (0.0<=fract<2). The inputs maxchoices and maxfactor are used to
!   determine how many options to examine at a given recursion level. The
!   variable clevel should be initialized to 0 and is used to keep track of
!   the recursion level. The number of options testes is counter and returned
!   in ncount.
    recursive subroutine SplitRegionsRecursive ( nprocs, fract, maxchoices, &
                                                 maxfactor, clevel, &
                                                 newreg, ncount )
      real(wp), intent(in) :: nprocs, fract
      integer, intent(in) :: maxchoices, maxfactor
      integer, intent(inout) :: clevel
!     The intent has been removed to make it compile with gfortran 4.1.
!      type(superregion2), pointer, intent(inout) :: newreg
      type(superregion2), pointer :: newreg
      integer, intent(inout) :: ncount
      type(bbox) :: reg
      integer, dimension(3) :: cost
      integer :: maxcost, ngood, i, np1, lo1, up1, str
      real(wp) :: p1, p2
      integer, dimension(1) :: mydim
      integer, dimension(maxchoices) :: npr
      real(wp), dimension(maxchoices) :: ncr
      type(ptr), dimension(maxchoices) :: newregarr
      real(wp), dimension(maxchoices) :: imbalancearr
      type(bbox) :: reg1, reg2
      integer, dimension(1) :: minimbalanceloc
      integer :: nlocal

!     Extract the bounding box information
      reg = newreg%extent

!     Make sure the array with the test super regions are nullified.
      do i = 1, maxchoices
        nullify ( newregarr(i)%point )
      end do

!     Initialize the imbalance array to max floating point value.
      imbalancearr(:) = minval(empty)

!     Increment the recursion level variable.
      clevel = clevel + 1

!     Calculate the number of choices to examine at the current recursion
!     level based on the value of maxchoices and maxfactor.
      nlocal = max ( maxchoices - ( clevel - 1)  / maxfactor, 1 )

!     If we are called with nprocs==max(1,fract) mark the region to be a leaf
!     in the tree, increment the choice counter and decrement the recursion
!     level counter and return.
      if (nprocs <= max(1.0_wp,fract+0.000001_wp) ) then
        ncount = ncount + 1
        clevel = clevel - 1
        newreg%processor = 1
        newreg%frac = nprocs
        return
      end if

!     Calculate the cost of the current region.
      call cost_box ( reg, cost )

!     Find the dimension with the most points.
      maxcost = maxval(cost)
      mydim = maxloc (cost)

!     Get the list of chunk options to investigate.
      call choosenprocs ( cost(mydim(1)), nprocs, fract, nlocal, ngood, &
                                                                 npr, ncr )

!     Loop over the chunk options to investigate.
      chunks: do i=1, ngood

!       Initialize the super region.
        allocate ( newregarr(i)%point )
        newregarr(i)%point = newreg

!       Find the processor numbers of the two chunks (p1 always an integer).
        p1 = npr(i)
        p2 = nprocs - npr(i)

!       Find the number of points in each of the 2 chunks while making
!       sure the chunks are not too small compared to the ghostsize if
!       limit_size is true.
!       Take also the granularity into account.
        np1 = nint((real(maxcost,wp)*p1)/nprocs/granularity)*granularity
!       TODO: Take granularity_boundary into account as well!
        if ( limit_size ) then
          np1 = max(ghostsize,np1)
          np1 = min(maxcost-ghostsize,np1)
        end if

!       Split the region (reg) into two regions (reg1, reg2) taking the
!       stride into account.
        reg1 = reg
        reg2 = reg
        lo1 = reg%dim(mydim(1))%lower
        up1 = reg%dim(mydim(1))%upper
        str = reg%dim(mydim(1))%stride
        reg1%dim(mydim(1))%upper = lo1 + (np1 - 1)*str
        reg2%dim(mydim(1))%lower = reg1%dim(mydim(1))%upper + str

!       Add the new leaves to the tree.
        call split_sregion ( newregarr(i)%point, mydim(1), (/ np1 /) )

!       Split region reg1 among p1 processors and region reg2 among p2
!       processors adding the new regions to the super region.
        call SplitRegionsRecursive ( p1, fract, maxchoices, maxfactor, &
                                     clevel, &
                                     newregarr(i)%point%children(1)%point, &
                                     ncount )
        call SplitRegionsRecursive ( p2, fract, maxchoices, maxfactor, &
                                     clevel, &
                                     newregarr(i)%point%children(2)%point, &
                                     ncount )

!       Calculate the load imbalance of the resulting super region.
        call calc_imbalance ( newregarr(i)%point, imbalancearr(i) )

!       If the balance is perfect exit the do loop and don't do any further
!       tests.
        if ( imbalancearr(i) == 0.0_wp ) then
          ngood = i
          exit
        end if
      end do chunks

!     Find the location of the minimum load imbalance.
      minimbalanceloc = minloc ( imbalancearr(1:ngood) )      

!     Insert the best choice into the tree structure      
      call point_to_children ( newreg,  newregarr(minimbalanceloc(1))%point )

!     Cut the connection between the best choice and its children.
      call disassociate ( newregarr(minimbalanceloc(1))%point )

!     Deallocate all the tested options.
      do i = 1, ngood
        call destroy_sregion ( newregarr(i)%point )
      end do

!     Decrement the recursion level counter.
      clevel = clevel - 1

    end subroutine SplitRegionsRecursive


!   Routine to return the union of 2 pre-sorted arrays of integers.
!   only n1 elements of list1 and n2 elements of list2 are actually used.
!   The number of elements in the union is n. The union is returned in list.
    subroutine union (n1, list1, n2, list2, n, list )
      integer, intent(in) :: n1, n2
      integer, dimension(:), intent(in) :: list1, list2
      integer, intent(out) :: n
      integer, dimension(:), intent(out) :: list
      integer :: j, k, newl

!     Initialize n and list to 0. The integers j and k are used to march through
!     the 2 input arrays and are initialized to 1 if the lists have some
!     valid elements and 0 otherwise.
      n = 0; list = 0; j = min(1,n1); k = min(1,n2)

!     Initialize the output array to 0
      list = 0

!     If both input arrays are 0, so are the output array and we are done.
      if (n1==0 .and. n2==0) then
        return
      end if

!     Repeat until j>n1 and k>n2.
      loop: do

!       If there are still valid elements in both list1 and list2...
        lists: if ( (j>0 .and. j<=n1) .and. (k>0 .and. k<=n2) ) then

!         The next element to add to list is the minimum of the current values
!         in list1 and list2
          newl = min ( list1(j), list2(k) )

!         If the element is not already in the output list....
          both: if ( n > 0 ) then
            if ( newl > list(n) ) then

!             Add the element
              n = n + 1
              list(n) = newl

!             If the added element comes from list1 increment j
              if ( list1(j) == list(n) ) j = j+1

!             If the added element comes from list2 increment j
              if ( list2(k) == list(n) ) k = k+1

            end if

!         Or if the output list is empty
          else both

!           Add the element
            n = n + 1
            list(n) = newl

!           If the added element comes from list1 increment j
            if ( list1(j) == list(n) ) j = j+1

!           If the added element comes from list2 increment j
            if ( list2(k) == list(n) ) k = k+1

          end if both

!       If there there is only valid elements left in list1
        else if (j>0 .and. j<=n1) then lists

!         Add the current element of list1 to list if it is not already there
!         or list is empty and increment j.
          newl = list1(j)
          list_one: if ( n > 0 ) then
            if ( newl > list(n) ) then
              n = n + 1
              list(n) = newl
              j = j+1
            end if
          else list_one
            n = n + 1
            list(n) = newl
            j = j+1
          end if list_one

!       If there there is only valid elements left in list2
        else if (k>0 .and. k<=n2) then lists

!         Add the current element of list2 to list if it is not already there
!         or list is empty and increment k.
          newl = list2(k)
          list_two: if ( n > 0 ) then
            if ( newl > list(n) ) then
              n = n + 1
              list(n) = newl
              k = k+1
            end if
          else list_two
            n = n + 1
            list(n) = newl
            k = k+1
          end if list_two
        end if lists

!       If both list1 and list2 have been fully traversed then exit.
        if ( j > n1 .and. k > n2 ) exit

      end do loop

    end subroutine union


!   Routine to figure out how many processors to assign to a list of super
!   regions and if necessary split super regions into regions to maintain load
!   balance across all regions. Returns a lits of potentially split super
!   regions and 2 arrays containing the minimum and maximum processor id
!   assigned to all leaf regions in the list of super region tree structures.
    subroutine SplitSuperRegions ( sregion, nprocs )
      type(ptr), dimension(:), intent(inout) :: sregion
      integer, intent(in) :: nprocs
      integer :: i, nregs, totcost, &
                 clevel, ncount, maxcost, np
      integer, dimension(1) :: mydir
      integer, dimension(:), allocatable :: cost
      integer, dimension(3) :: mycost
      integer, dimension(:,:), allocatable :: lcost
      real(wp) :: idealcost, nrprocs
      real(wp), dimension(:), allocatable :: nsplit, accumsplit, frac1, frac2
      real(wp) :: fract
      integer, dimension(:), allocatable :: accumcost, minmap, maxmap, &
                                            splitnum

!     Set nregs to the number of super regions in the input list.
      nregs = size(sregion)

!     Allocate helper arrays. 
      allocate ( cost(nregs), accumcost(nregs+1), minmap(nregs), &
                 maxmap(nregs), nsplit(nregs), accumsplit(nregs+1), &
                 frac1(nregs), frac2(nregs), lcost(nregs,3), splitnum(nregs) )

!     Set cost to the total cost of each super region and accumcost to the
!     accumulated cost.
      accumcost(1) = 0
      do i = 1, nregs
        call cost_box ( sregion(i)%point%extent, lcost(i,:) )
        cost(i) = product ( lcost(i,:) )
        accumcost(i+1) = accumcost(i) + cost(i)
      end do

!     Calculate the total cost (of all super regions) as well as the idealcost.
!     Note that totcost is an integer, while idealcost is real.
      totcost = sum ( cost )
      idealcost = real(totcost,wp) / nprocs

!     Calculate the ideal processor location for the beginning and end of each
!     super region, the fractional number of processors to be assigned to this
!     region as well as the accumulated fractional processor count.
      accumsplit(1) = 0.0_wp
      do i = 1, nregs
        minmap(i) = floor(accumcost(i)/idealcost) 
        maxmap(i) = ceiling(accumcost(i+1)/idealcost) - 1 
        nsplit(i) = cost(i)/idealcost
        accumsplit(i+1) = accumsplit(i)+nsplit(i)
      end do

!     Calculate the size of the fractional regions at the beginning and the
!     end of each super region.
      do i = 1, nregs
        if ( maxmap(i) > minmap(i) ) then
          frac1(i) = ceiling(accumsplit(i))-accumsplit(i)
          frac2(i) = accumsplit(i+1)-floor(accumsplit(i+1))
        else
          frac1(i) = 0.0_wp
          frac2(i) = 0.0_wp
        end if
      end do
 
!     Loop over the super regions.
      do i = 1, nregs
        nrprocs = nsplit(i)
!       If the region begins and ends on the same processor, don't split
!       but just assign the region to that proceesor.
        if ( maxmap(i) == minmap(i) ) then
          sregion(i)%point%processor = minmap(i)
          sregion(i)%point%frac = nrprocs
!       Otherwise split the region.
        else
!         Initialize counter variables used in the recursive routine.
          clevel = 0
          ncount = 0
!         Calculate the fractional part that has to be left unsplit. This is
!         the sum of the fractional leftovers at either end.
          fract = frac1(i)+frac2(i)
!         If nrprocs > fract (fuzzy logic) then split recursively on nrprocs
!         processors with fract left unsplit (0.0<fract<2.0) and then
!         assign processors to the resulting regions.
          if ( abs ( nrprocs -  fract ) > 0.000001_wp ) then
            call SplitRegionsRecursive ( nrprocs, fract, 4, 3, &
                    clevel, sregion(i)%point, ncount )
            call AssignProcs ( minmap(i), maxmap(i), frac1(i), frac2(i), &
                               sregion(i)%point )
!         Otherwise we just need to split the superregion into 2 regions
!         of size fract1 and fract2.
          else
!           Find the cost of the region.
            call cost_box ( sregion(i)%point%extent, mycost )
!           Find the direction of maximal cost.
            maxcost = maxval(mycost)
            mydir = maxloc (mycost)
!           Find the number of gridpoints in direction mydir of the first
!           split region.
            np = nint(frac1(i)/fract*mycost(mydir(1)))
!           Split the region.
            call split_sregion ( sregion(i)%point, mydir(1), (/ np /) )
!           Assign the processors to the resulting regions.
            sregion(i)%point%processor = -1
            sregion(i)%point%children(1)%point%processor = minmap(i)
            sregion(i)%point%children(1)%point%frac = frac1(i)
            sregion(i)%point%children(2)%point%processor = maxmap(i)
            sregion(i)%point%children(2)%point%frac = frac2(i)
          end if
        end if
      end do

!     Deallocate local helper arrays.
      deallocate ( cost, accumcost, minmap, &
                 maxmap, nsplit, accumsplit, &
                 frac1, frac2, lcost, splitnum )
 
    end subroutine SplitSuperRegions

end module carpet_boxtypes