diff options
Diffstat (limited to 'Carpet/Carpet/src/LoadBalanceReal/carpet_boxtypes.F90')
-rw-r--r-- | Carpet/Carpet/src/LoadBalanceReal/carpet_boxtypes.F90 | 776 |
1 files changed, 776 insertions, 0 deletions
diff --git a/Carpet/Carpet/src/LoadBalanceReal/carpet_boxtypes.F90 b/Carpet/Carpet/src/LoadBalanceReal/carpet_boxtypes.F90 new file mode 100644 index 000000000..a03d312c6 --- /dev/null +++ b/Carpet/Carpet/src/LoadBalanceReal/carpet_boxtypes.F90 @@ -0,0 +1,776 @@ +#include "cctk.h" + +module carpet_boxtypes + + use carpet_region + implicit none + + integer :: ghostsize + real(wp) :: alpha + logical :: limit_size + integer :: granularity + integer :: granularity_boundary + integer :: procid + + contains + +! The basic calculation of the cost in each of the 3 directions. + subroutine cost_box ( reg, cost ) + type(bbox), intent(in) :: reg + integer, dimension(3), intent(out) :: cost + integer :: i + + do i = 1, 3 + cost(i) = (reg%dim(i)%upper - reg%dim(i)%lower)/reg%dim(i)%stride + 1 + end do + end subroutine cost_box + + +! Find a sorted list of the best options for splitting npoints in 2 chunks +! among nprocs processors. The fractional remainder is passed in through +! fract. The maximum allowed number of options to return is nmax, and the +! actual number of returned options is nact. The result is returned in +! np (number of procs for the first chunk) and nc (a measure of the +! imabalance of the split. + subroutine choosenprocs ( npoints, nprocs, fract, nmax, nact, np, nc ) + integer, intent(in) :: npoints, nmax + real(wp), intent(in) :: nprocs, fract + integer, intent(out) :: nact + integer, dimension(:), intent(out) :: np + real(wp), dimension(:), intent(out) :: nc + integer :: idealcost + integer :: quot, i, j, round, ind + real(wp) :: c1, c2, currentimbal + logical :: exchange + +! The idealcost should be npoints/nprocs but I rescale with nprocs for +! The purpose of avoiding some problems with floating point comparisons. + idealcost = npoints + +! Initialize the cost to the maximum possible floating point number. + nc = minval( empty ) + +! initialize the actual returned number of options to 0. + nact = 0 + +! Initialize an integer index to zero. Used to keep track where the current +! option should be inserted in the result vector. + ind = 1 + +! Calculate the starting number of processors to start the search with +! (quot) as the largest integer smaller than nprocs. + quot = floor(nprocs) +! If the fractional part we have to leave unsplit is larger than 1 +! decrement quot by 1. + if ( fract > 1.0_wp ) quot = quot-1 +! If nprocs has zero fractional part, we can start at quot/2 due to +! symmetry. + if ( .not. ( nprocs>quot ) ) then + quot = quot / 2 + end if + +! Try all options until reaching a single processor. + options: do i=quot, 1, -1 + +! Calculate the number of points that will yield the smallest imbalance. + round = nint(real(npoints,wp)/nprocs*i) + +! Calculate the imbalance for the current choice of processors. +! This has been scaled by nprocs (like idealcost has been above). + c1 = abs ( (round*nprocs)/i - idealcost ) + c2 = abs ( (npoints-round)/(nprocs-i)*nprocs - idealcost ) + currentimbal = max ( c1, c2 ) + +! Initialize the boolean exchange that keeps track of whether the current +! choice needs to replace an entry in the return vectors. + exchange = .false. + +! Start from the current number of entries in the return vector and check +! if the current choice is better than any of them. If so indicate that +! an exchange has to happen and record the index. + do j = nact, 1, -1 + if ( currentimbal < nc(j) ) then + exchange = .true. + ind = j + end if + end do + +! If an exchange has to happen, shift all elements with indices higher +! than or equal to ind to the right. + if (exchange) then + np(ind:nmax) = eoshift(np(ind:nmax),-1) + nc(ind:nmax) = eoshift(nc(ind:nmax),-1) + end if + +! If we don't need to do an exchange, and if nact<nmax add the current +! case after the previous entries. + if (nact<nmax .and. .not.exchange) then + nact = nact+1 + np(nact) = i + nc(nact) = currentimbal + end if + +! If the return vectors are empty or an exhange has to be done, insert +! the current choice at the appropriate position (ind) and increment the +! number of elements in the return vector (capped at nmax) + if (nact == 0 .or. exchange) then + nact = min(nact+1,nmax) + np(ind) = i + nc(ind) = currentimbal + end if + end do options + +! This is commented out for now. Used to return only the elements that are +! as good as the best case. +! nbad = nact + 1 +! do i = 2, nact +! if ( abs(nc(i)-nc(i-1))>1.0d-10 ) nbad = i +! end do +! nact = nbad - 1 + end subroutine choosenprocs + + +! Calculate a measure of the load imbalance of a super region. +! The measure is a weighted sum taking into account the imbalance in the +! number of points (computation) and the imbalance in the number of ghost +! zones (communication). Ghost zones are added with a multiplication factor +! of alpha. + subroutine calc_imbalance ( sregion, imbalance ) +! The intent has been removed to make it compile with gfortran 4.1. +! type(superregion2), pointer, intent(in) :: sregion + type(superregion2), pointer :: sregion + real(wp), intent(out) :: imbalance + integer, dimension(3) :: cost + real(wp) :: maxcost, maxgcost, n + real(wp) :: idealcost, idealgcost + +! Initialize counter and accumulation variables + n = 0.0_wp + maxcost = 0.0_wp + idealcost = 0.0_wp + maxgcost = 0.0_wp + idealgcost = 0.0_wp + +! Traverse the tree structure and add values for all leaves. + call traverse_tree ( sregion ) + +! Calculate the ideal cost + idealcost = idealcost/n + idealgcost = idealgcost/n + +! Calculate the imbalance + imbalance = ( maxcost/idealcost - 1.0_wp ) & + + alpha * ( maxgcost/idealgcost - 1.0_wp ) + + contains + +! Routine to traverse the tree and accumulate the cost of the leaves. + recursive subroutine traverse_tree ( sreg ) +! The intent has been removed to make it compile with gfortran 4.1. +! type(superregion2), pointer, intent(in) :: sreg + type(superregion2), pointer :: sreg + integer :: ich, nch, tcost, gcost + +! If the region has children recursively traverse the children. + if ( associated(sreg%children) ) then + nch = size(sreg%children) + do ich = 1, nch + call traverse_tree ( sreg%children(ich)%point ) + end do + +! Otherwise it's a leaf and we accumulate the cost and update the +! maximum cost. + else + call cost_box ( sreg%extent, cost ) + tcost = product ( cost, mask = cost /= 0 ) + gcost = product ( cost+2*ghostsize, mask = cost /= 0 ) - tcost + tcost = tcost + gcost = gcost + maxcost = max ( maxcost, tcost / sreg%frac ) + maxgcost = max ( maxgcost, gcost / sreg%frac ) + idealcost = idealcost + tcost + idealgcost = idealgcost + gcost + n = n + sreg%frac + end if + + end subroutine traverse_tree + + end subroutine calc_imbalance + + +! Assign processor numbers to the leaves in a tree starting from start_proc. + subroutine AssignProcs ( minproc, maxproc, frac1, frac2, sregion ) + integer, intent(in) :: minproc, maxproc + real(wp ), intent(in) :: frac1, frac2 + logical :: lower_is_outer, upper_is_outer + integer :: boundarysize, minsize +! The intent has been removed to make it compile with gfortran 4.1. +! type(superregion2), pointer, intent(inout) :: sregion + type(superregion2), pointer :: sregion + character :: msg*200 + +! Initialize. + if ( frac1 == 0 ) then + procid = minproc + else + procid = minproc + 1 + end if + +! Traverse the tree. + call traverse_treep ( sregion ) + + contains + +! Routine to traverse the tree and assign processor ids to the leaves. + recursive subroutine traverse_treep ( sreg ) +! The intent has been removed to make it compile with gfortran 4.1. +! type(superregion2), pointer, intent(inout) :: sreg + type(superregion2), pointer :: sreg + integer :: ich, nch, maxcost, np + integer, dimension(3) :: cost + integer, dimension(1) :: mydim + +! If the region has children recursively traverse the children. + if ( associated(sreg%children) ) then + nch = size(sreg%children) + do ich = 1, nch + call traverse_treep ( sreg%children(ich)%point ) + end do + +! Otherwise it's a leaf and we assign the processor id and increment it. + else + if ( sreg%frac == 1.0_wp ) then + sreg%processor = procid + procid = procid + 1 + else + call cost_box ( sreg%extent, cost ) + maxcost = maxval(cost) + mydim = maxloc (cost) + lower_is_outer = sreg%outer_boundaries%obound(mydim(1),1)/=0 + upper_is_outer = sreg%outer_boundaries%obound(mydim(1),2)/=0 + if (frac1 == 0.0_wp) then + np = 0 + else if (frac2 == 0.0_wp) then + np = maxcost + else + np = nint(frac1/(frac1+frac2)*maxcost) + end if + if (limit_size) then + boundarysize = ghostsize + minsize = boundarysize+ghostsize + if (lower_is_outer .and. np>0 .and. np<minsize) then + if (np<minsize/2) then + np = 0 + else + np = minsize + end if + end if + if (upper_is_outer .and. & + np<maxcost .and. np>maxcost-minsize) then + if (np>maxcost-minsize/2) then + np = maxcost + else + np = maxcost-minsize + end if + end if + end if + if ((lower_is_outer .and. np>0 .and. np<minsize) .or. & + (upper_is_outer .and. & + np<maxcost .and. np>maxcost-minsize)) then + write (msg, '("Not enough grid points: np=",i4," gs=",i4, "maxcost=",i4)') & + np, boundarysize, maxcost + call CCTK_WARN(CCTK_WARN_ABORT, msg) + end if + if (np>0 .and. np<maxcost) then + call split_sregion ( sreg, mydim(1), (/ np /) ) + sreg%processor = -1 + sreg%children(1)%point%processor = minproc + sreg%children(1)%point%frac = frac1 + sreg%children(2)%point%processor = maxproc + sreg%children(2)%point%frac = frac2 + else if (np==0) then + sreg%processor = maxproc + else if (np==maxcost) then + sreg%processor = minproc + end if + + end if + end if + + end subroutine traverse_treep + + end subroutine AssignProcs + + +! Routine to sum up the load of an array of super regions. Return the result +! in the pre-allocated array of length nprocs. The array should be +! initialized to zero. + subroutine calc_proc_load ( sregions, proc_load ) + type(ptr), dimension(:), intent(in) :: sregions + integer, dimension(:), intent(inout) :: proc_load + integer :: i + +! For each of the super regions in the array. + do i = 1, size(sregions) + +! Traverse the tree and + call traverse_treel ( sregions(i)%point ) + end do + + contains + +! Routine to traverse the tree and add the load of each leaf to the +! assigned processor. + recursive subroutine traverse_treel ( sreg ) +! The intent has been removed to make it compile with gfortran 4.1. +! type(superregion2), pointer, intent(in) :: sreg + type(superregion2), pointer :: sreg + integer, dimension(3) :: cost + integer :: ich, nch + +! If the region has children recursively traverse the children. + if ( associated(sreg%children) ) then + nch = size(sreg%children) + do ich = 1, nch + call traverse_treel ( sreg%children(ich)%point ) + end do + +! Otherwise it's a leaf and we calculate the cost and accumulate it +! into the correct spot in the proc_load array. + + else + call cost_box ( sreg%extent, cost ) + proc_load(sreg%processor+1) = proc_load(sreg%processor+1) + & + product ( cost ) + end if + + end subroutine traverse_treel + + end subroutine calc_proc_load + + +! Split a superregion (newreg) recursively into smaller boxes while trying +! to minimize the load imbalance. Here we allow for fractional processor +! numbers, nprocs. The fraction that should be left unsplit is given by +! fract (0.0<=fract<2). The inputs maxchoices and maxfactor are used to +! determine how many options to examine at a given recursion level. The +! variable clevel should be initialized to 0 and is used to keep track of +! the recursion level. The number of options testes is counter and returned +! in ncount. + recursive subroutine SplitRegionsRecursive ( nprocs, fract, maxchoices, & + maxfactor, clevel, & + newreg, ncount ) + real(wp), intent(in) :: nprocs, fract + integer, intent(in) :: maxchoices, maxfactor + integer, intent(inout) :: clevel +! The intent has been removed to make it compile with gfortran 4.1. +! type(superregion2), pointer, intent(inout) :: newreg + type(superregion2), pointer :: newreg + integer, intent(inout) :: ncount + type(bbox) :: reg + integer, dimension(3) :: cost + integer :: maxcost, ngood, i, np1, lo1, up1, str + real(wp) :: p1, p2 + integer, dimension(1) :: mydim + integer, dimension(maxchoices) :: npr + real(wp), dimension(maxchoices) :: ncr + type(ptr), dimension(maxchoices) :: newregarr + real(wp), dimension(maxchoices) :: imbalancearr + logical :: lower_is_outer, upper_is_outer + integer :: boundarysize, minsize + type(bbox) :: reg1, reg2 + integer, dimension(1) :: minimbalanceloc + integer :: nlocal + character :: msg*200 + +! Extract the bounding box information + reg = newreg%extent + +! Make sure the array with the test super regions are nullified. + do i = 1, maxchoices + nullify ( newregarr(i)%point ) + end do + +! Initialize the imbalance array to max floating point value. + imbalancearr(:) = minval(empty) + +! Increment the recursion level variable. + clevel = clevel + 1 + +! Calculate the number of choices to examine at the current recursion +! level based on the value of maxchoices and maxfactor. + nlocal = max ( maxchoices - ( clevel - 1) / maxfactor, 1 ) + +! If we are called with nprocs==max(1,fract) mark the region to be a leaf +! in the tree, increment the choice counter and decrement the recursion +! level counter and return. + if (nprocs <= max(1.0_wp,fract+0.000001_wp) ) then + ncount = ncount + 1 + clevel = clevel - 1 + newreg%processor = 1 + newreg%frac = nprocs + return + end if + +! Calculate the cost of the current region. + call cost_box ( reg, cost ) + +! Find the dimension with the most points. + maxcost = maxval(cost) + mydim = maxloc (cost) + +! Get the list of chunk options to investigate. + call choosenprocs ( cost(mydim(1)), nprocs, fract, nlocal, ngood, & + npr, ncr ) + +! Loop over the chunk options to investigate. + chunks: do i=1, ngood + +! Initialize the super region. + allocate ( newregarr(i)%point ) + newregarr(i)%point = newreg + +! Find the processor numbers of the two chunks (p1 always an integer). + p1 = npr(i) + p2 = nprocs - npr(i) + +! Find the number of points in each of the 2 chunks while making +! sure the chunks are not too small compared to the ghostsize if +! limit_size is true. +! Take also the granularity into account. +! At the outer boundary, take also the boundary size into account, +! assuming that the boundary size is equal to the ghost size. +! (Boundary points cannot be split, and near a boundary, the +! minimum number of interior points is the number of ghost points.) + np1 = nint((real(maxcost,wp)*p1)/nprocs/granularity)*granularity +! TODO: Take granularity_boundary into account as well! +! TODO: Take granularity into account when limiting! + if (limit_size) then + lower_is_outer = & + newregarr(i)%point%outer_boundaries%obound(mydim(1),1)/=0 + upper_is_outer = & + newregarr(i)%point%outer_boundaries%obound(mydim(1),2)/=0 + boundarysize = ghostsize + minsize = boundarysize+ghostsize + if (lower_is_outer .and. np1>0 .and. np1<minsize) then + np1 = minsize + end if + if (upper_is_outer .and. np1<maxcost .and. np1>maxcost-minsize) then + np1 = maxcost-minsize + end if + if ((lower_is_outer .and. np1>0 .and. np1<minsize) .or. & + (upper_is_outer .and. np1<maxcost .and. np1>maxcost-minsize)) then + write (msg, '("Not enough grid points: np=",i4," gs=",i4, "maxcost=",i4)') & + np1, boundarysize, maxcost + call CCTK_WARN(CCTK_WARN_ABORT, msg) + end if + end if + +! Split the region (reg) into two regions (reg1, reg2) taking the +! stride into account. + reg1 = reg + reg2 = reg + lo1 = reg%dim(mydim(1))%lower + up1 = reg%dim(mydim(1))%upper + str = reg%dim(mydim(1))%stride + reg1%dim(mydim(1))%upper = lo1 + (np1 - 1)*str + reg2%dim(mydim(1))%lower = reg1%dim(mydim(1))%upper + str + +! Add the new leaves to the tree. + call split_sregion ( newregarr(i)%point, mydim(1), (/ np1 /) ) + +! Split region reg1 among p1 processors and region reg2 among p2 +! processors adding the new regions to the super region. + call SplitRegionsRecursive ( p1, fract, maxchoices, maxfactor, & + clevel, & + newregarr(i)%point%children(1)%point, & + ncount ) + call SplitRegionsRecursive ( p2, fract, maxchoices, maxfactor, & + clevel, & + newregarr(i)%point%children(2)%point, & + ncount ) + +! Calculate the load imbalance of the resulting super region. + call calc_imbalance ( newregarr(i)%point, imbalancearr(i) ) + +! If the balance is perfect exit the do loop and don't do any further +! tests. + if ( imbalancearr(i) == 0.0_wp ) then + ngood = i + exit + end if + end do chunks + +! Find the location of the minimum load imbalance. + minimbalanceloc = minloc ( imbalancearr(1:ngood) ) + +! Insert the best choice into the tree structure + call point_to_children ( newreg, newregarr(minimbalanceloc(1))%point ) + +! Cut the connection between the best choice and its children. + call disassociate ( newregarr(minimbalanceloc(1))%point ) + +! Deallocate all the tested options. + do i = 1, ngood + call destroy_sregion ( newregarr(i)%point ) + end do + +! Decrement the recursion level counter. + clevel = clevel - 1 + + end subroutine SplitRegionsRecursive + + +! Routine to return the union of 2 pre-sorted arrays of integers. +! only n1 elements of list1 and n2 elements of list2 are actually used. +! The number of elements in the union is n. The union is returned in list. + subroutine union (n1, list1, n2, list2, n, list ) + integer, intent(in) :: n1, n2 + integer, dimension(:), intent(in) :: list1, list2 + integer, intent(out) :: n + integer, dimension(:), intent(out) :: list + integer :: j, k, newl + +! Initialize n and list to 0. The integers j and k are used to march through +! the 2 input arrays and are initialized to 1 if the lists have some +! valid elements and 0 otherwise. + n = 0; list = 0; j = min(1,n1); k = min(1,n2) + +! Initialize the output array to 0 + list = 0 + +! If both input arrays are 0, so are the output array and we are done. + if (n1==0 .and. n2==0) then + return + end if + +! Repeat until j>n1 and k>n2. + loop: do + +! If there are still valid elements in both list1 and list2... + lists: if ( (j>0 .and. j<=n1) .and. (k>0 .and. k<=n2) ) then + +! The next element to add to list is the minimum of the current values +! in list1 and list2 + newl = min ( list1(j), list2(k) ) + +! If the element is not already in the output list.... + both: if ( n > 0 ) then + if ( newl > list(n) ) then + +! Add the element + n = n + 1 + list(n) = newl + +! If the added element comes from list1 increment j + if ( list1(j) == list(n) ) j = j+1 + +! If the added element comes from list2 increment j + if ( list2(k) == list(n) ) k = k+1 + + end if + +! Or if the output list is empty + else both + +! Add the element + n = n + 1 + list(n) = newl + +! If the added element comes from list1 increment j + if ( list1(j) == list(n) ) j = j+1 + +! If the added element comes from list2 increment j + if ( list2(k) == list(n) ) k = k+1 + + end if both + +! If there there is only valid elements left in list1 + else if (j>0 .and. j<=n1) then lists + +! Add the current element of list1 to list if it is not already there +! or list is empty and increment j. + newl = list1(j) + list_one: if ( n > 0 ) then + if ( newl > list(n) ) then + n = n + 1 + list(n) = newl + j = j+1 + end if + else list_one + n = n + 1 + list(n) = newl + j = j+1 + end if list_one + +! If there there is only valid elements left in list2 + else if (k>0 .and. k<=n2) then lists + +! Add the current element of list2 to list if it is not already there +! or list is empty and increment k. + newl = list2(k) + list_two: if ( n > 0 ) then + if ( newl > list(n) ) then + n = n + 1 + list(n) = newl + k = k+1 + end if + else list_two + n = n + 1 + list(n) = newl + k = k+1 + end if list_two + end if lists + +! If both list1 and list2 have been fully traversed then exit. + if ( j > n1 .and. k > n2 ) exit + + end do loop + + end subroutine union + + +! Routine to figure out how many processors to assign to a list of super +! regions and if necessary split super regions into regions to maintain load +! balance across all regions. Returns a lits of potentially split super +! regions and 2 arrays containing the minimum and maximum processor id +! assigned to all leaf regions in the list of super region tree structures. + subroutine SplitSuperRegions ( sregion, nprocs ) + type(ptr), dimension(:), intent(inout) :: sregion + integer, intent(in) :: nprocs + integer :: i, nregs, totcost, & + clevel, ncount, maxcost, np + logical :: lower_is_outer, upper_is_outer + integer :: boundarysize, minsize + integer, dimension(1) :: mydir + integer, dimension(:), allocatable :: cost + integer, dimension(3) :: mycost + integer, dimension(:,:), allocatable :: lcost + real(wp) :: idealcost, nrprocs + real(wp), dimension(:), allocatable :: nsplit, accumsplit, frac1, frac2 + real(wp) :: fract + integer, dimension(:), allocatable :: accumcost, minmap, maxmap, & + splitnum + character :: msg*200 + +! Set nregs to the number of super regions in the input list. + nregs = size(sregion) + +! Allocate helper arrays. + allocate ( cost(nregs), accumcost(nregs+1), minmap(nregs), & + maxmap(nregs), nsplit(nregs), accumsplit(nregs+1), & + frac1(nregs), frac2(nregs), lcost(nregs,3), splitnum(nregs) ) + +! Set cost to the total cost of each super region and accumcost to the +! accumulated cost. + accumcost(1) = 0 + do i = 1, nregs + call cost_box ( sregion(i)%point%extent, lcost(i,:) ) + cost(i) = product ( lcost(i,:) ) + accumcost(i+1) = accumcost(i) + cost(i) + end do + +! Calculate the total cost (of all super regions) as well as the idealcost. +! Note that totcost is an integer, while idealcost is real. + totcost = sum ( cost ) + idealcost = real(totcost,wp) / nprocs + +! Calculate the ideal processor location for the beginning and end of each +! super region, the fractional number of processors to be assigned to this +! region as well as the accumulated fractional processor count. + accumsplit(1) = 0.0_wp + do i = 1, nregs + minmap(i) = floor(accumcost(i)/idealcost) + maxmap(i) = ceiling(accumcost(i+1)/idealcost) - 1 + nsplit(i) = cost(i)/idealcost + accumsplit(i+1) = accumsplit(i)+nsplit(i) + end do + +! Calculate the size of the fractional regions at the beginning and the +! end of each super region. + do i = 1, nregs + if ( maxmap(i) > minmap(i) ) then + frac1(i) = ceiling(accumsplit(i))-accumsplit(i) + frac2(i) = accumsplit(i+1)-floor(accumsplit(i+1)) + else + frac1(i) = 0.0_wp + frac2(i) = 0.0_wp + end if + end do + +! Loop over the super regions. + do i = 1, nregs + nrprocs = nsplit(i) +! If the region begins and ends on the same processor, don't split +! but just assign the region to that processor. + if ( maxmap(i) == minmap(i) ) then + sregion(i)%point%processor = minmap(i) + sregion(i)%point%frac = nrprocs +! Otherwise split the region. + else +! Initialize counter variables used in the recursive routine. + clevel = 0 + ncount = 0 +! Calculate the fractional part that has to be left unsplit. This is +! the sum of the fractional leftovers at either end. + fract = frac1(i)+frac2(i) +! If nrprocs > fract (fuzzy logic) then split recursively on nrprocs +! processors with fract left unsplit (0.0<fract<2.0) and then +! assign processors to the resulting regions. + if ( abs ( nrprocs - fract ) > 0.000001_wp ) then + call SplitRegionsRecursive ( nrprocs, fract, 4, 3, & + clevel, sregion(i)%point, ncount ) + call AssignProcs ( minmap(i), maxmap(i), frac1(i), frac2(i), & + sregion(i)%point ) +! Otherwise we just need to split the superregion into 2 regions +! of size fract1 and fract2. + else +! Find the cost of the region. + call cost_box ( sregion(i)%point%extent, mycost ) +! Find the direction of maximal cost. + maxcost = maxval(mycost) + mydir = maxloc (mycost) +! Find the number of gridpoints in direction mydir of the first +! split region. + np = nint(frac1(i)/fract*mycost(mydir(1))) + if (limit_size) then + lower_is_outer = & + sregion(i)%point%outer_boundaries%obound(mydir(1),1)/=0 + upper_is_outer = & + sregion(i)%point%outer_boundaries%obound(mydir(1),2)/=0 + boundarysize = ghostsize + minsize = boundarysize+ghostsize + if (lower_is_outer .and. np>0 .and. np<minsize) then + np = minsize + end if + if (upper_is_outer .and. np<maxcost .and. np>maxcost-minsize) then + np = maxcost-minsize + end if + if ((lower_is_outer .and. np>0 .and. np<minsize) .or. & + (upper_is_outer .and. & + np<maxcost .and. np>maxcost-minsize)) then + write (msg, '("Not enough grid points: np=",i4," gs=",i4, "maxcost=",i4)') & + np, boundarysize, maxcost + call CCTK_WARN(CCTK_WARN_ABORT, msg) + end if + end if +! Split the region. + call split_sregion ( sregion(i)%point, mydir(1), (/ np /) ) +! Assign the processors to the resulting regions. + sregion(i)%point%processor = -1 + sregion(i)%point%children(1)%point%processor = minmap(i) + sregion(i)%point%children(1)%point%frac = frac1(i) + sregion(i)%point%children(2)%point%processor = maxmap(i) + sregion(i)%point%children(2)%point%frac = frac2(i) + end if + end if + end do + +! Deallocate local helper arrays. + deallocate ( cost, accumcost, minmap, & + maxmap, nsplit, accumsplit, & + frac1, frac2, lcost, splitnum ) + + end subroutine SplitSuperRegions + +end module carpet_boxtypes |