generate_space_most_populated Subroutine

public subroutine generate_space_most_populated(target_space_size, tApproxSpace, nApproxSpace, ilut_list, space_size, run, opt_source, opt_source_size, t_opt_fast_core)

Arguments

Type IntentOptional Attributes Name
integer, intent(in) :: target_space_size
logical, intent(in) :: tApproxSpace
integer, intent(in) :: nApproxSpace
integer(kind=n_int), intent(inout) :: ilut_list(0:,:)
integer, intent(inout) :: space_size
integer, intent(in) :: run
integer(kind=n_int), intent(in), optional :: opt_source(0:,:)
integer(kind=n_int), intent(in), optional :: opt_source_size
logical, intent(in), optional :: t_opt_fast_core

Contents


Source Code

    subroutine generate_space_most_populated(target_space_size, tApproxSpace, &
                                             nApproxSpace, ilut_list, space_size, run, opt_source, opt_source_size, t_opt_fast_core)

        ! In: target_space_size - The number of determinants to attempt to keep
        !         from if less determinants are present then use all of them.
        ! In: tApproxSpace - If true then only find *approximately* the best
        !         space, to save memory (although in many cases it will end up
        !         finding the best space).
        ! In: nApproxSpace - factor that defines how many states are kept on each
        !         process if tApproxSpace is true, 1 =< nApproxSpace =< nProcessors.
        !         The larger nApproxSpace, the more memory is consumed and the slower
        !         (but more accurate) the semi-stochastic initialisation is.
        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.

        integer, intent(in) :: target_space_size, nApproxSpace
        integer(n_int), intent(in), optional :: opt_source_size
        integer(n_int), intent(in), optional :: opt_source(0:, :)
        logical, intent(in) :: tApproxSpace
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size
        integer, intent(in) :: run
        logical, intent(in), optional :: t_opt_fast_core
        real(dp), allocatable, dimension(:) :: amps_this_proc, amps_all_procs
        real(dp) :: real_sign(lenof_sign)
        integer(MPIArg) :: length_this_proc, total_length
        integer(MPIArg) :: lengths(0:nProcessors - 1), disps(0:nProcessors - 1)
        integer(n_int) :: temp_ilut(0:NIfTot)
        real(dp) :: core_sum, all_core_sum
        integer(n_int), dimension(:, :), allocatable :: largest_states
        integer, allocatable, dimension(:) :: indices_to_keep
        integer :: j, ierr, ind, n_pops_keep, n_states_this_proc
        integer(int64) :: i
        integer(TagIntType) :: TagA, TagB, TagC, TagD
        character(len=*), parameter :: this_routine = "generate_space_most_populated"
        integer :: nzero_dets
        real(dp) :: max_vals(0:nProcessors - 1), min_vals(0:nProcessors - 1), max_sign, min_sign
        integer(int64) :: source_size
        integer(n_int), allocatable :: loc_source(:, :)
        integer(MPIARg) :: max_size
        logical :: t_use_fast_pops_core

        if (present(opt_source)) then
            ASSERT(present(opt_source_size))
            source_size = int(opt_source_size, int64)

            allocate(loc_source(0:niftot, 1:source_size), &
                      source=opt_source(0:niftot, 1:source_size))
!             loc_source => opt_source

        else
            source_size = TotWalkers
            allocate(loc_source(0:niftot, 1:source_size), &
                      source=CurrentDets(0:niftot, 1:source_size))
!             loc_source => CurrentDets
        end if

        def_default(t_use_fast_pops_core, t_opt_fast_core, .false.)

        n_pops_keep = target_space_size

        ! Quickly loop through and find the number of determinants with
        ! zero sign.
        nzero_dets = 0
        do i = 1, source_size
            call extract_sign(loc_source(:, i), real_sign)
            if (core_space_weight(real_sign, run) < 1.e-8_dp) &
                nzero_dets = nzero_dets + 1
        end do

        if (tApproxSpace .and. nProcessors > nApproxSpace) then
            ! Look at nApproxSpace  times the number of states that we expect to keep on
            ! this process. This is done instead of sending the best
            ! target_space_size states to all processes, which is often
            ! overkill and uses up too much memory.
            max_size = ceiling(real(nApproxSpace * n_pops_keep) / real(nProcessors), MPIArg)
        else
            max_size = int(n_pops_keep, MPIArg)
        end if

        length_this_proc = min(max_size, int(source_size - nzero_dets, MPIArg))

        call MPIAllGather(length_this_proc, lengths, ierr)
        total_length = sum(lengths)
        if (total_length < n_pops_keep) then
            call warning_neci(this_routine, "The number of states in the walker list is less &
                                   &than the number you requested. All states &
                                   &will be used.")
            n_pops_keep = total_length
        end if

        ! Allocate necessary arrays and log the memory used.
        allocate(amps_this_proc(length_this_proc), stat=ierr)
        call LogMemAlloc("amps_this_proc", int(length_this_proc), 8, this_routine, TagA, ierr)
        allocate(amps_all_procs(total_length), stat=ierr)
        call LogMemAlloc("amps_all_procs", int(total_length), 8, this_routine, TagB, ierr)
        allocate(indices_to_keep(n_pops_keep), stat=ierr)
        call LogMemAlloc("indices_to_keep", n_pops_keep, sizeof_int, this_routine, TagC, ierr)
        allocate(largest_states(0:NIfTot, length_this_proc), stat=ierr)
        call LogMemAlloc("largest_states", int(length_this_proc) * (NIfTot + 1), &
                         size_n_int, this_routine, TagD, ierr)

        disps(0) = 0_MPIArg
        do i = 1, nProcessors - 1
            disps(i) = disps(i - 1) + lengths(i - 1)
        end do

        ! Return the most populated states in source on *this* processor.
        call proc_most_populated_states(int(length_this_proc), run, largest_states)

        do i = 1, length_this_proc
            ! Store the real amplitudes in their real form.
            call extract_sign(largest_states(:, i), real_sign)
            ! We are interested in the absolute values of the ampltiudes.
            amps_this_proc(i) = core_space_weight(real_sign, run)
        end do

        if (t_use_fast_pops_core) then
            if (length_this_proc > 0) then
                min_sign = amps_this_proc(1)
                max_sign = amps_this_proc(ubound(amps_this_proc, dim=1))
            else
                min_sign = 0.0
                max_sign = 0.0
            end if

            ! Make the max/min values available on all procs
            call MPIAllGather(min_sign, min_vals, ierr)
            call MPIAllGather(max_sign, max_vals, ierr)

            call return_proc_share(n_pops_keep, min_vals, max_vals, int(lengths), &
                                   amps_this_proc, n_states_this_proc)
        else

            ! Now we want to combine all the most populated states from each processor to find
            ! how many states to keep from each processor.
            ! Take the top length_this_proc states from each processor.
            call MPIAllGatherV(amps_this_proc(1:length_this_proc), amps_all_procs(1:total_length), lengths, disps)
            ! This routine returns indices_to_keep, which will store the indices in amps_all_procs
            ! of those amplitudes which are among the n_pops_keep largest (but not sorted).
            call return_largest_indices(n_pops_keep, int(total_length), amps_all_procs, indices_to_keep)

            n_states_this_proc = 0
            do i = 1, n_pops_keep

                ind = indices_to_keep(i)

                ! Find the processor label for this state. The states of the ampltidues in
                ! amps_all_procs are together in blocks, in order of the corresponding processor
                ! label. Hence, if a state has index <= lengths(0) then it is on processor 0.
                do j = 0, nProcessors - 1
                    ind = ind - lengths(j)
                    if (ind <= 0) then
                        ! j now gives the processor label. If the state is on *this* processor,
                        ! update n_states_this_proc.
                        if (j == iProcIndex) n_states_this_proc = n_states_this_proc + 1
                        exit
                    end if
                end do

            end do
        end if

        ! Add the states to the ilut_list array.
        temp_ilut = 0_n_int
        core_sum = 0.0_dp
        do i = 1, n_states_this_proc
            ! The states in largest_states are sorted from smallest to largest.
            temp_ilut(0:NIfTot) = largest_states(0:NIfTot, length_this_proc - i + 1)
            call add_state_to_space(temp_ilut, ilut_list, space_size)
            core_sum = core_sum + amps_this_proc(length_this_proc - i + 1)
        end do
        call MPISum(core_sum, all_core_sum)
        write(stdout, *) "Total core population", all_core_sum
        deallocate(amps_this_proc)
        call LogMemDealloc(this_routine, TagA, ierr)
        deallocate(amps_all_procs)
        call LogMemDealloc(this_routine, TagB, ierr)
        deallocate(indices_to_keep)
        call LogMemDealloc(this_routine, TagC, ierr)
        deallocate(largest_states)
        call LogMemDealloc(this_routine, TagD, ierr)

    end subroutine generate_space_most_populated