communicate_estimates Subroutine

public subroutine communicate_estimates(iter_data, tot_parts_new, tot_parts_new_all, t_output)

Uses

Arguments

Type IntentOptional Attributes Name
type(fcimc_iter_data) :: iter_data
real(kind=dp), intent(in) :: tot_parts_new(lenof_sign)
real(kind=dp), intent(out) :: tot_parts_new_all(lenof_sign)
logical, intent(in) :: t_output

Contents

Source Code


Source Code

    subroutine communicate_estimates(iter_data, tot_parts_new, tot_parts_new_all, t_output)

        ! This routine sums all estimators and stats over all processes.

        ! We want this to be done in as few MPI calls as possible. Therefore, all
        ! quantities are first placed into one of two arrays. There is one array
        ! for HElement_t(dp) estimates, and another array (real(dp)) for all other
        ! kinds and types. A single MPISumAll is then performed for both of these
        ! combined arrays. After communication, the summed results are copied to
        ! the appropriate final arrays, with the type and kind corrected when
        ! necessary.

        ! There are also a few separate MPI calls which reduce using MPI_MAX and
        ! MPI_MIN at the end.

        ! -- *IMPORTANT FOR DEVELOPERS* ---------------------------------------
        ! To add a new quantity to be communicated, you must give it a new entry
        ! in send_arr or send_arr_helem array. It is hopefully clear how to do
        ! this by analogy. You should also update the indices in the appropriate
        ! stop_all, so that it can be checked if enough memory has been assigned.
        use adi_data, only: nCoherentDoubles, nIncoherentDets, nConnection, &
                            AllCoherentDoubles, AllIncoherentDets, AllConnection
        type(fcimc_iter_data) :: iter_data
        real(dp), intent(in) :: tot_parts_new(lenof_sign)
        real(dp), intent(out) :: tot_parts_new_all(lenof_sign)

        ! RT_M_Merge: Added real-time statistics for the newer communication scheme
        integer, parameter :: real_arr_size = 2000
        integer, parameter :: hel_arr_size = 200
        ! RT_M_Merge: Doubled all array sizes since there are now two
        ! copies of most of the variables (necessary?)

        ! Allow room to send up to 1000 (2000 for rt) elements.
        real(dp) :: send_arr(real_arr_size)
        ! Allow room to receive up to 2000 elements.
        real(dp) :: recv_arr(real_arr_size)
        logical, intent(in) :: t_output
        logical :: t_comm_trial
        ! Equivalent arrays for HElement_t variables.
        integer, parameter :: arr_helem_size = 300
        HElement_t(dp) :: send_arr_helem(arr_helem_size)
        HElement_t(dp) :: recv_arr_helem(arr_helem_size)
        ! Equivalent arrays for EXLEVELStats (of exactly required size).
        real(dp) :: send_arr_WNorm(3 * (NEl + 1) * inum_runs), &
                    recv_arr_WNorm(3 * (NEl + 1) * inum_runs)
        ! Allow room for 100 different arrays to be communicated.
        integer, parameter :: size_arr_size = 100
        integer :: sizes(size_arr_size)
        integer :: low, upp, run

        integer(int64) :: TotWalkersTemp
        ! [W.D.12.12.2017]
        ! allow for triples now:
        ! Todo: make that more flexible in the future!
        real(dp) :: bloom_sz_tmp(0:3)
        real(dp) :: RealAllHFCyc(max(lenof_sign, inum_runs))
        real(dp) :: RealAllHFOut(max(lenof_sign, inum_runs))
        real(dp) :: all_norm_semistoch_squared(inum_runs)
        integer :: NoArrs
        character(len=*), parameter :: t_r = 'communicate_estimates'
        integer :: cnt

        ! Remove the holes in the main list when wanting the number of uniquely
        ! occupied determinants.
        TotWalkersTemp = TotWalkers - HolesInList

        ! The trial wavefunction is communicated before output and only if the option is on
        t_comm_trial = t_output .and. tTrialWavefunction

        sizes = 0

        ! low will represent the lower bound of an array slice.
        low = 0
        ! upp will represent the upper bound of an array slice.
        upp = 0

        sizes(1) = size(SpawnFromSing)
        sizes(2) = size(iter_data%update_growth)
        sizes(3) = size(NoBorn)
        sizes(4) = size(NoDied)
        sizes(5) = size(HFCyc)
        sizes(6) = size(NoAtDoubs)
        sizes(7) = size(Annihilated)
        if (tTruncInitiator) then
            sizes(8) = size(NoAddedInitiators)
            sizes(9) = size(NoInitDets)
            sizes(10) = size(NoNonInitDets)
            sizes(11) = size(NoExtraInitDoubs)
            sizes(12) = size(InitRemoved)
            sizes(13) = size(NoAborted)
            sizes(14) = size(NoRemoved)
            sizes(15) = size(NoNonInitWalk)
            sizes(16) = size(NoInitWalk)
        end if
        sizes(17) = 1 ! TotWalkersTemp (single int, not an array)
        sizes(18) = size(norm_psi_squared)
        sizes(19) = size(norm_semistoch_squared)
        sizes(20) = size(TotParts)
        sizes(21) = size(tot_parts_new)
        sizes(22) = size(SumNoAtHF)
        sizes(23) = size(bloom_count)
        sizes(24) = size(NoAtHF)
        sizes(25) = size(SumWalkersCyc)
        sizes(26) = 1 ! nspawned (single int, not an array)

        sizes(27) = 1 ! inst_double_occ
        if (tTruncInitiator) sizes(28) = 1 ! doubleSpawns
        ! communicate the coherence numbers for SI
        sizes(29) = 1

        sizes(30) = 1
        ! Perturbation correction
        sizes(31) = 1

        ! communicate the instant spin diff.. although i am not sure if this
        ! gets too big..
        if (t_spin_measurements) then
            sizes(32) = nBasis / 2
            sizes(33) = nBasis / 2
        end if
        ! truncated weight
        sizes(34) = 1
        ! inits per ex lvl
        sizes(35) = size(initsPerExLvl)
        ! number of successful/invalid excits
        sizes(36) = 1
        sizes(37) = 1

        if (t_measure_local_spin) then
            sizes(38) = nBasis / 2
        end if
        ! en pert space size
        if (tEN2) sizes(39) = 1
        ! Output variable
        if (t_output) then
            sizes(40) = size(HFOut)
            sizes(41) = size(Acceptances)
            sizes(42) = size(SumWalkersOut)
            sizes(43) = 1
        end if
        if (t_real_time_fciqmc) then
            sizes(44) = size(popSnapShot)
            NoArrs = 44
        else
            NoArrs = 43
        end if

        send_arr = 0.0_dp

        if (sum(sizes(1:NoArrs)) > real_arr_size) call stop_all(t_r, &
             "No space left in arrays for communication of estimates. Please increase &
             & the size of the send_arr and recv_arr arrays in the source code.")

        low = upp + 1; upp = low + sizes(1) - 1; send_arr(low:upp) = SpawnFromSing;
        low = upp + 1; upp = low + sizes(2) - 1; send_arr(low:upp) = iter_data%update_growth;
        low = upp + 1; upp = low + sizes(3) - 1; send_arr(low:upp) = NoBorn;
        low = upp + 1; upp = low + sizes(4) - 1; send_arr(low:upp) = NoDied;
        low = upp + 1; upp = low + sizes(5) - 1; send_arr(low:upp) = HFCyc;
        low = upp + 1; upp = low + sizes(6) - 1; send_arr(low:upp) = NoAtDoubs;
        low = upp + 1; upp = low + sizes(7) - 1; send_arr(low:upp) = Annihilated;
        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(8) - 1; send_arr(low:upp) = NoAddedInitiators;
            low = upp + 1; upp = low + sizes(9) - 1; send_arr(low:upp) = NoInitDets;
            low = upp + 1; upp = low + sizes(10) - 1; send_arr(low:upp) = NoNonInitDets;
            low = upp + 1; upp = low + sizes(11) - 1; send_arr(low:upp) = NoExtraInitDoubs;
            low = upp + 1; upp = low + sizes(12) - 1; send_arr(low:upp) = InitRemoved;
            low = upp + 1; upp = low + sizes(13) - 1; send_arr(low:upp) = NoAborted;
            low = upp + 1; upp = low + sizes(14) - 1; send_arr(low:upp) = NoRemoved;
            low = upp + 1; upp = low + sizes(15) - 1; send_arr(low:upp) = NoNonInitWalk;
            low = upp + 1; upp = low + sizes(16) - 1; send_arr(low:upp) = NoInitWalk;
        end if

        low = upp + 1; upp = low + sizes(17) - 1; send_arr(low:upp) = TotWalkersTemp;
        low = upp + 1; upp = low + sizes(18) - 1; send_arr(low:upp) = norm_psi_squared;
        low = upp + 1; upp = low + sizes(19) - 1; send_arr(low:upp) = norm_semistoch_squared;
        low = upp + 1; upp = low + sizes(20) - 1; send_arr(low:upp) = TotParts;
        low = upp + 1; upp = low + sizes(21) - 1; send_arr(low:upp) = tot_parts_new;
        low = upp + 1; upp = low + sizes(22) - 1; send_arr(low:upp) = SumNoAtHf;
        low = upp + 1; upp = low + sizes(23) - 1; send_arr(low:upp) = bloom_count;
        low = upp + 1; upp = low + sizes(24) - 1; send_arr(low:upp) = NoAtHF;
        low = upp + 1; upp = low + sizes(25) - 1; send_arr(low:upp) = SumWalkersCyc;
        low = upp + 1; upp = low + sizes(26) - 1; send_arr(low:upp) = nspawned;
        ! double occ change:
        low = upp + 1; upp = low + sizes(27) - 1; send_arr(low:upp) = inst_double_occ

        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(28) - 1; send_arr(low:upp) = doubleSpawns;
        end if
        low = upp + 1; upp = low + sizes(29) - 1; send_arr(low:upp) = nCoherentDoubles
        low = upp + 1; upp = low + sizes(30) - 1; send_arr(low:upp) = nIncoherentDets
        low = upp + 1; upp = low + sizes(31) - 1; send_arr(low:upp) = nConnection

        if (t_spin_measurements) then
            low = upp + 1; upp = low + sizes(32) - 1; send_arr(low:upp) = inst_spin_diff
            low = upp + 1; upp = low + sizes(33) - 1; send_arr(low:upp) = inst_spatial_doub_occ
        end if

        ! truncated weight
        low = upp + 1; upp = low + sizes(34) - 1; send_arr(low:upp) = truncatedWeight;
        ! initiators per excitation level
        low = upp + 1; upp = low + sizes(35) - 1; send_arr(low:upp) = initsPerExLvl;
        ! excitation number trackers

        low = upp + 1; upp = low + sizes(36) - 1; send_arr(low:upp) = nInvalidExcits;
        low = upp + 1; upp = low + sizes(37) - 1; send_arr(low:upp) = nValidExcits;
        ! local spin
        if (t_measure_local_spin) then
            low = upp + 1; upp = low + sizes(38) - 1; send_arr(low:upp) = inst_local_spin;
        end if

        ! en pert space size
        if (tEN2) then
            low = upp + 1; upp = low + sizes(39) - 1; send_arr(low:upp) = en_pert_main%ndets;
        end if

        if (t_output) then
            low = upp + 1; upp = low + sizes(40) - 1; send_arr(low:upp) = HFOut
            low = upp + 1; upp = low + sizes(41) - 1; send_arr(low:upp) = Acceptances
            low = upp + 1; upp = low + sizes(42) - 1; send_arr(low:upp) = SumWalkersOut
            low = upp + 1; upp = low + sizes(43) - 1; send_arr(low:upp) = n_core_non_init
            if (t_real_time_fciqmc) then
                low = upp + 1; upp = low + sizes(44) - 1; send_arr(low:upp) = popSnapShot;
            end if
        end if

        ! Perform the communication.
        call MPISumAll(send_arr(1:upp), recv_arr(1:upp))

        ! Now we just need each result to be extracted to the correct array, with
        ! the correct type.

        low = 0; upp = 0

        low = upp + 1; upp = low + sizes(1) - 1; AllSpawnFromSing = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(2) - 1; iter_data%update_growth_tot = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(3) - 1; AllNoBorn = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(4) - 1; AllNoDied = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(5) - 1; RealAllHFCyc = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(6) - 1; AllNoAtDoubs = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(7) - 1; AllAnnihilated = recv_arr(low:upp);
        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(8) - 1; AllNoAddedInitiators = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(9) - 1; AllNoInitDets = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(10) - 1; AllNoNonInitDets = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(11) - 1; AllNoExtraInitDoubs = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(12) - 1; AllInitRemoved = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(13) - 1; AllNoAborted = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(14) - 1; AllNoRemoved = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(15) - 1; AllNoNonInitWalk = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(16) - 1; AllNoInitWalk = recv_arr(low:upp);
        end if
        low = upp + 1; upp = low + sizes(17) - 1; AllTotWalkers = nint(recv_arr(low), int64);
        low = upp + 1; upp = low + sizes(18) - 1; all_norm_psi_squared = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(19) - 1; all_norm_semistoch_squared = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(20) - 1; AllTotParts = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(21) - 1; tot_parts_new_all = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(22) - 1; AllSumNoAtHF = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(23) - 1; all_bloom_count = nint(recv_arr(low:upp));
        low = upp + 1; upp = low + sizes(24) - 1; AllNoAtHf = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(25) - 1; AllSumWalkersCyc = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(26) - 1; nspawned_tot = nint(recv_arr(low), int64);
        ! double occ:
        low = upp + 1; upp = low + sizes(27) - 1; all_inst_double_occ = recv_arr(low);
        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(28) - 1; allDoubleSpawns = nint(recv_arr(low));
            doubleSpawns = 0
        end if
        low = upp + 1; upp = low + sizes(29) - 1; AllCoherentDoubles = nint(recv_arr(low));
        low = upp + 1; upp = low + sizes(30) - 1; AllIncoherentDets = nint(recv_arr(low));
        low = upp + 1; upp = low + sizes(31) - 1; AllConnection = nint(recv_arr(low));
        if (t_spin_measurements) then
            low = upp + 1; upp = low + sizes(32) - 1; all_inst_spin_diff = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(33) - 1; all_inst_spatial_doub_occ = recv_arr(low:upp)
        end if

        ! truncated weight
        low = upp + 1; upp = low + sizes(34) - 1; AllTruncatedWeight = recv_arr(low);
        ! initiators per excitation level
        low = upp + 1; upp = low + sizes(35) - 1; AllInitsPerExLvl = nint(recv_arr(low:upp));
        ! excitation number trackers
        low = upp + 1; upp = low + sizes(36) - 1; allNInvalidExcits = nint(recv_arr(low), int64);
        low = upp + 1; upp = low + sizes(37) - 1; allNValidExcits = nint(recv_arr(low), int64);

        ! local spin
        if (t_measure_local_spin) then
            low = upp + 1; upp = low + sizes(38) - 1; all_local_spin = recv_arr(low:upp);
        end if

        ! en_pert space size
        if (tEN2) then
            low = upp + 1; upp = low + sizes(39) - 1; en_pert_main%ndets_all = nint(recv_arr(low));
        end if
        ! Output variables
        if (t_output) then
            low = upp + 1; upp = low + sizes(40) - 1; RealAllHFOut = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(41) - 1; AllAcceptances = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(42) - 1; AllSumWalkersOut = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(43) - 1; all_n_core_non_init = nint(recv_arr(low))
            if (t_real_time_fciqmc) then
                low = upp + 1; upp = low + sizes(44) - 1; allPopSnapShot = recv_arr(low:upp);
            end if
        end if
        ! Communicate HElement_t variables:

        low = 0; upp = 0;
        sizes(1) = size(ENumCyc)
        sizes(2) = size(SumENum)
        sizes(3) = size(ENumCycAbs)
        sizes(4) = size(cyc_proje_denominator)
        sizes(5) = size(sum_proje_denominator)
        if (t_comm_trial) then
            sizes(6) = size(trial_numerator)
            sizes(7) = size(trial_denom)
            sizes(8) = size(trial_num_inst)
            sizes(9) = size(trial_denom_inst)
            sizes(10) = size(init_trial_numerator)
            sizes(11) = size(init_trial_denom)
        end if
        sizes(12) = size(InitsEnumCyc)
        sizes(13) = size(ENumOut)

        if (sum(sizes(1:14)) > arr_helem_size) call stop_all(t_r, "No space left in arrays for communication of estimates. Please &
                                                        & increase the size of the send_arr_helem and recv_arr_helem &
                                                        & arrays in the source code.")

        low = upp + 1; upp = low + sizes(1) - 1; send_arr_helem(low:upp) = ENumCyc;
        low = upp + 1; upp = low + sizes(2) - 1; send_arr_helem(low:upp) = SumENum;
        low = upp + 1; upp = low + sizes(3) - 1; send_arr_helem(low:upp) = ENumCycAbs;
        low = upp + 1; upp = low + sizes(4) - 1; send_arr_helem(low:upp) = cyc_proje_denominator;
        low = upp + 1; upp = low + sizes(5) - 1; send_arr_helem(low:upp) = sum_proje_denominator;
        if (t_comm_trial) then
            low = upp + 1; upp = low + sizes(6) - 1; send_arr_helem(low:upp) = trial_numerator;
            low = upp + 1; upp = low + sizes(7) - 1; send_arr_helem(low:upp) = trial_denom;
            low = upp + 1; upp = low + sizes(8) - 1; send_arr_helem(low:upp) = trial_num_inst;
            low = upp + 1; upp = low + sizes(9) - 1; send_arr_helem(low:upp) = trial_denom_inst;
            low = upp + 1; upp = low + sizes(10) - 1; send_arr_helem(low:upp) = init_trial_numerator;
            low = upp + 1; upp = low + sizes(11) - 1; send_arr_helem(low:upp) = init_trial_denom;
        end if
        low = upp + 1; upp = low + sizes(12) - 1; send_arr_helem(low:upp) = InitsENumCyc;
        if (t_output) then
            low = upp + 1; upp = low + sizes(13) - 1; send_arr_helem(low:upp) = ENumOut;
        end if

        call MPISumAll(send_arr_helem(1:upp), recv_arr_helem(1:upp))

        low = 0; upp = 0;
        low = upp + 1; upp = low + sizes(1) - 1; AllENumCyc = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(2) - 1; AllSumENum = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(3) - 1; AllENumCycAbs = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(4) - 1; all_cyc_proje_denominator = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(5) - 1; all_sum_proje_denominator = recv_arr_helem(low:upp);
        if (t_comm_trial) then
            low = upp + 1; upp = low + sizes(6) - 1; tot_trial_numerator = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(7) - 1; tot_trial_denom = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(8) - 1; tot_trial_num_inst = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(9) - 1; tot_trial_denom_inst = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(10) - 1; tot_init_trial_numerator = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(11) - 1; tot_init_trial_denom = recv_arr_helem(low:upp);
        end if
        low = upp + 1; upp = low + sizes(12) - 1; AllInitsENumCyc = recv_arr_helem(low:upp);
        if (t_output) then
            low = upp + 1; upp = low + sizes(13) - 1; AllEnumOut = recv_arr_helem(low:upp);
        end if

        ! Optionally communicate EXLEVEL_WNorm.
        if (tLogEXLEVELStats) then
            upp = size(EXLEVEL_WNorm)
            send_arr_WNorm(1:upp) = reshape(EXLEVEL_WNorm, (/upp/))
            call MPISumAll(send_arr_WNorm(1:upp), recv_arr_WNorm(1:upp))
            AllEXLEVEL_WNorm = reshape(recv_arr_WNorm(1:upp), &
                                       shape(AllEXLEVEL_WNorm))
            ! Apply square root for L2 norm.
            AllEXLEVEL_WNorm(2, :, :) = sqrt(AllEXLEVEL_WNorm(2, :, :))
        end if

        ! Do some processing of the received data.

        ! Convert real array into HElement_t one.
        do run = 1, inum_runs
            AllHFCyc(run) = ARR_RE_OR_CPLX(RealAllHFCyc, run)
            AllHFOut(run) = ARR_RE_OR_CPLX(RealAllHFOut, run)
        end do

#ifdef CMPLX_
        norm_psi = sqrt(sum(all_norm_psi_squared))
        norm_semistoch = sqrt(sum(all_norm_semistoch_squared))
#else
        norm_psi = sqrt(all_norm_psi_squared)
        norm_semistoch = sqrt(all_norm_semistoch_squared)
#endif

        ! These require a different type of reduce operation, so are communicated
        ! separately to the above communication.
        call MPIAllReduce(bloom_sizes(1:2), MPI_MAX, bloom_sz_tmp(1:2))
        bloom_sizes(1:2) = bloom_sz_tmp(1:2)

        ! Arrays for checking load balancing.
        call MPIReduce(TotWalkersTemp, MPI_MAX, MaxWalkersProc)
        call MPIReduce(TotWalkersTemp, MPI_MIN, MinWalkersProc)
        call MPIReduce(max_cyc_spawn, MPI_MAX, all_max_cyc_spawn)

    end subroutine communicate_estimates