Specialized routine that returns the number of determinants that are going into the core-space on this processor. Once the leading determinants have been obtained on each processor, this function requires the minimal and maximal populations among these for all processors (requires previous MPI_Gather), then the procedure to determine the core-space size on this processor is as follows: 1) Get the minimum of the maximal populations, then count the number of determinants above this population. This count is then broadcasted to the other procs, and the total number of determinants above the smallest maximum is determined. If it is smaller than the core-space size, these determinants are put into the core-space, else we repeat with the second smallest of the maximal populations, and so on. 2) Get the maximum of the minimal populations, then count the number of determinants below this population. This count is then broadcasted to the other procs, and the total number of determinants below the largest minimum is determined. If the number of determinants that are remaining (i.e. larger than the largest minimum and smaller than the smallest maximum) is sufficient to fill up the core-space (in particular, the smallest maximum has to be bigger than the largest minimum), the small determinants are discarded. Else, we repeat this with the second largest minimum, and so on. 3) From the remaining determinants, each processor contributes a share that equals to the ratio of the remaining determinants on this proc to the total remaining determinants
| Type | Intent | Optional | Attributes | Name | ||
|---|---|---|---|---|---|---|
| integer, | intent(in) | :: | n_keep |
core-space size |
||
| real(kind=dp), | intent(inout) | :: | min_vals(0:nProcessors-1) | |||
| real(kind=dp), | intent(inout) | :: | max_vals(0:nProcessors-1) | |||
| integer, | intent(in) | :: | lengths(0:nProcessors-1) |
core-space size |
||
| real(kind=dp), | intent(in) | :: | list(:) | |||
| integer, | intent(out) | :: | n_dets_this_proc |
number of core-space determinants on this processor |
subroutine return_proc_share(n_keep, min_vals, max_vals, lengths, list, n_dets_this_proc) use util_mod, only: binary_search_first_ge real(dp), intent(inout) :: max_vals(0:nProcessors - 1), min_vals(0:nProcessors - 1) real(dp), intent(in) :: list(:) integer, intent(in) :: n_keep, lengths(0:nProcessors - 1) integer, intent(out) :: n_dets_this_proc integer :: sum_max, sum_min real(dp) :: min_max, max_min integer :: dets_left, pool_left real(dp) :: total_pool, ip_ratio integer :: n_max(0:nProcessors - 1), n_min(0:nProcessors - 1) integer :: missing, n_full, i dets_left = -1 ! Increase the cutoff until our selection is small enough do while (dets_left < 0) call get_pp_ex(min_max, n_max, sum_max, max_vals) ! Number of determinants left when keeping the maximal ones dets_left = n_keep - sum_max end do ! Definitely take these determinants n_dets_this_proc = n_max(iProcIndex) max_min = min_max + 1 total_pool = -1 ! Reduce the cutoff until we are below the min_max do while (max_min > min_max .or. total_pool < dets_left) call get_pp_ex(max_min, n_min, sum_min, min_vals, t_max=.true.) ! Size of the pool left when not keeping the minimal ones ( has to be at least ! big enough to fill the core-space) total_pool = sum(lengths) - sum_max - sum_min end do ! If the corespace consists of all chosen determinants, the remaining pool might be 0 ! -> no further action, take all determinants if (total_pool > 0) then ! Number of available dets on this proc after removing min/max pool_left = lengths(iProcIndex) - n_max(iProcIndex) - n_min(iProcIndex) ! Ratio of available dets on this proc vs. in totap ip_ratio = pool_left / real(total_pool, dp) ! If any further dets have to be picked, get them from all procs weighted with the pool sizes n_dets_this_proc = n_dets_this_proc + int(ip_ratio * dets_left) end if contains subroutine get_pp_ex(ex, n_ex, sum_ex, vals, t_max) use Parallel_neci, only: MPIAllGather real(dp), intent(out) :: ex integer, intent(out) :: n_ex(0:nProcessors - 1), sum_ex real(dp), intent(inout) :: vals(0:nProcessors - 1) logical, intent(in), optional :: t_max integer :: ex_ind, n_ex_loc integer :: ierr real(dp) :: pre logical :: t_max_ def_default(t_max_, t_max, .false.) if (t_max_) then pre = -1.0 else pre = 1.0 end if ! Get the smallest value of the per-proc max ex_ind = minloc(pre * vals, dim=1) - 1 ex = vals(ex_ind) ! Invalidate this value, such that the next call finds the second smallest value and so on vals(ex_ind) = pre * sum(abs(vals)) ! Now, get the location of the first element above the extremum if (size(list) > 0) then n_ex_loc = binary_search_first_ge(list, ex) else ! it might be possible that a proc is empty (has no candidates) ! in this case, never find anything above/below global extremal values n_ex_loc = -1 end if ! If no such element exists, return 0 on this proc if (n_ex_loc < 0) then n_ex_loc = 0 else if (t_max_) then ! From the position, get the number of elements below the extremum (for max_min) n_ex_loc = n_ex_loc - 1 else ! Or above the extremum (for min_max) n_ex_loc = lengths(iProcIndex) - n_ex_loc + 1 end if call MPIAllGather(n_ex_loc, n_ex, ierr) ! Check if the maximum pop is already sufficient sum_ex = sum(n_ex) end subroutine get_pp_ex end subroutine return_proc_share