subroutine communicate_estimates(iter_data, tot_parts_new, tot_parts_new_all, t_output)
! This routine sums all estimators and stats over all processes.
! We want this to be done in as few MPI calls as possible. Therefore, all
! quantities are first placed into one of two arrays. There is one array
! for HElement_t(dp) estimates, and another array (real(dp)) for all other
! kinds and types. A single MPISumAll is then performed for both of these
! combined arrays. After communication, the summed results are copied to
! the appropriate final arrays, with the type and kind corrected when
! necessary.
! There are also a few separate MPI calls which reduce using MPI_MAX and
! MPI_MIN at the end.
! -- *IMPORTANT FOR DEVELOPERS* ---------------------------------------
! To add a new quantity to be communicated, you must give it a new entry
! in send_arr or send_arr_helem array. It is hopefully clear how to do
! this by analogy. You should also update the indices in the appropriate
! stop_all, so that it can be checked if enough memory has been assigned.
use adi_data, only: nCoherentDoubles, nIncoherentDets, nConnection, &
AllCoherentDoubles, AllIncoherentDets, AllConnection
type(fcimc_iter_data) :: iter_data
real(dp), intent(in) :: tot_parts_new(lenof_sign)
real(dp), intent(out) :: tot_parts_new_all(lenof_sign)
! RT_M_Merge: Added real-time statistics for the newer communication scheme
integer, parameter :: real_arr_size = 2000
integer, parameter :: hel_arr_size = 200
! RT_M_Merge: Doubled all array sizes since there are now two
! copies of most of the variables (necessary?)
! Allow room to send up to 1000 (2000 for rt) elements.
real(dp) :: send_arr(real_arr_size)
! Allow room to receive up to 2000 elements.
real(dp) :: recv_arr(real_arr_size)
logical, intent(in) :: t_output
logical :: t_comm_trial
! Equivalent arrays for HElement_t variables.
integer, parameter :: arr_helem_size = 300
HElement_t(dp) :: send_arr_helem(arr_helem_size)
HElement_t(dp) :: recv_arr_helem(arr_helem_size)
! Equivalent arrays for EXLEVELStats (of exactly required size).
real(dp) :: send_arr_WNorm(3 * (NEl + 1) * inum_runs), &
recv_arr_WNorm(3 * (NEl + 1) * inum_runs)
! Allow room for 100 different arrays to be communicated.
integer, parameter :: size_arr_size = 100
integer :: sizes(size_arr_size)
integer :: low, upp, run
integer(int64) :: TotWalkersTemp
! [W.D.12.12.2017]
! allow for triples now:
! Todo: make that more flexible in the future!
real(dp) :: bloom_sz_tmp(0:3)
real(dp) :: RealAllHFCyc(max(lenof_sign, inum_runs))
real(dp) :: RealAllHFOut(max(lenof_sign, inum_runs))
real(dp) :: all_norm_semistoch_squared(inum_runs)
integer :: NoArrs
character(len=*), parameter :: t_r = 'communicate_estimates'
integer :: cnt
! Remove the holes in the main list when wanting the number of uniquely
! occupied determinants.
TotWalkersTemp = TotWalkers - HolesInList
! The trial wavefunction is communicated before output and only if the option is on
t_comm_trial = t_output .and. tTrialWavefunction
sizes = 0
! low will represent the lower bound of an array slice.
low = 0
! upp will represent the upper bound of an array slice.
upp = 0
sizes(1) = size(SpawnFromSing)
sizes(2) = size(iter_data%update_growth)
sizes(3) = size(NoBorn)
sizes(4) = size(NoDied)
sizes(5) = size(HFCyc)
sizes(6) = size(NoAtDoubs)
sizes(7) = size(Annihilated)
if (tTruncInitiator) then
sizes(8) = size(NoAddedInitiators)
sizes(9) = size(NoInitDets)
sizes(10) = size(NoNonInitDets)
sizes(11) = size(NoExtraInitDoubs)
sizes(12) = size(InitRemoved)
sizes(13) = size(NoAborted)
sizes(14) = size(NoRemoved)
sizes(15) = size(NoNonInitWalk)
sizes(16) = size(NoInitWalk)
end if
sizes(17) = 1 ! TotWalkersTemp (single int, not an array)
sizes(18) = size(norm_psi_squared)
sizes(19) = size(norm_semistoch_squared)
sizes(20) = size(TotParts)
sizes(21) = size(tot_parts_new)
sizes(22) = size(SumNoAtHF)
sizes(23) = size(bloom_count)
sizes(24) = size(NoAtHF)
sizes(25) = size(SumWalkersCyc)
sizes(26) = 1 ! nspawned (single int, not an array)
sizes(27) = 1 ! inst_double_occ
if (tTruncInitiator) sizes(28) = 1 ! doubleSpawns
! communicate the coherence numbers for SI
sizes(29) = 1
sizes(30) = 1
! Perturbation correction
sizes(31) = 1
! communicate the instant spin diff.. although i am not sure if this
! gets too big..
if (t_spin_measurements) then
sizes(32) = nBasis / 2
sizes(33) = nBasis / 2
end if
! truncated weight
sizes(34) = 1
! inits per ex lvl
sizes(35) = size(initsPerExLvl)
! number of successful/invalid excits
sizes(36) = 1
sizes(37) = 1
if (t_measure_local_spin) then
sizes(38) = nBasis / 2
end if
! en pert space size
if (tEN2) sizes(39) = 1
! Output variable
if (t_output) then
sizes(40) = size(HFOut)
sizes(41) = size(Acceptances)
sizes(42) = size(SumWalkersOut)
sizes(43) = 1
end if
if (t_real_time_fciqmc) then
sizes(44) = size(popSnapShot)
NoArrs = 44
else
NoArrs = 43
end if
send_arr = 0.0_dp
if (sum(sizes(1:NoArrs)) > real_arr_size) call stop_all(t_r, &
"No space left in arrays for communication of estimates. Please increase &
& the size of the send_arr and recv_arr arrays in the source code.")
low = upp + 1; upp = low + sizes(1) - 1; send_arr(low:upp) = SpawnFromSing;
low = upp + 1; upp = low + sizes(2) - 1; send_arr(low:upp) = iter_data%update_growth;
low = upp + 1; upp = low + sizes(3) - 1; send_arr(low:upp) = NoBorn;
low = upp + 1; upp = low + sizes(4) - 1; send_arr(low:upp) = NoDied;
low = upp + 1; upp = low + sizes(5) - 1; send_arr(low:upp) = HFCyc;
low = upp + 1; upp = low + sizes(6) - 1; send_arr(low:upp) = NoAtDoubs;
low = upp + 1; upp = low + sizes(7) - 1; send_arr(low:upp) = Annihilated;
if (tTruncInitiator) then
low = upp + 1; upp = low + sizes(8) - 1; send_arr(low:upp) = NoAddedInitiators;
low = upp + 1; upp = low + sizes(9) - 1; send_arr(low:upp) = NoInitDets;
low = upp + 1; upp = low + sizes(10) - 1; send_arr(low:upp) = NoNonInitDets;
low = upp + 1; upp = low + sizes(11) - 1; send_arr(low:upp) = NoExtraInitDoubs;
low = upp + 1; upp = low + sizes(12) - 1; send_arr(low:upp) = InitRemoved;
low = upp + 1; upp = low + sizes(13) - 1; send_arr(low:upp) = NoAborted;
low = upp + 1; upp = low + sizes(14) - 1; send_arr(low:upp) = NoRemoved;
low = upp + 1; upp = low + sizes(15) - 1; send_arr(low:upp) = NoNonInitWalk;
low = upp + 1; upp = low + sizes(16) - 1; send_arr(low:upp) = NoInitWalk;
end if
low = upp + 1; upp = low + sizes(17) - 1; send_arr(low:upp) = TotWalkersTemp;
low = upp + 1; upp = low + sizes(18) - 1; send_arr(low:upp) = norm_psi_squared;
low = upp + 1; upp = low + sizes(19) - 1; send_arr(low:upp) = norm_semistoch_squared;
low = upp + 1; upp = low + sizes(20) - 1; send_arr(low:upp) = TotParts;
low = upp + 1; upp = low + sizes(21) - 1; send_arr(low:upp) = tot_parts_new;
low = upp + 1; upp = low + sizes(22) - 1; send_arr(low:upp) = SumNoAtHf;
low = upp + 1; upp = low + sizes(23) - 1; send_arr(low:upp) = bloom_count;
low = upp + 1; upp = low + sizes(24) - 1; send_arr(low:upp) = NoAtHF;
low = upp + 1; upp = low + sizes(25) - 1; send_arr(low:upp) = SumWalkersCyc;
low = upp + 1; upp = low + sizes(26) - 1; send_arr(low:upp) = nspawned;
! double occ change:
low = upp + 1; upp = low + sizes(27) - 1; send_arr(low:upp) = inst_double_occ
if (tTruncInitiator) then
low = upp + 1; upp = low + sizes(28) - 1; send_arr(low:upp) = doubleSpawns;
end if
low = upp + 1; upp = low + sizes(29) - 1; send_arr(low:upp) = nCoherentDoubles
low = upp + 1; upp = low + sizes(30) - 1; send_arr(low:upp) = nIncoherentDets
low = upp + 1; upp = low + sizes(31) - 1; send_arr(low:upp) = nConnection
if (t_spin_measurements) then
low = upp + 1; upp = low + sizes(32) - 1; send_arr(low:upp) = inst_spin_diff
low = upp + 1; upp = low + sizes(33) - 1; send_arr(low:upp) = inst_spatial_doub_occ
end if
! truncated weight
low = upp + 1; upp = low + sizes(34) - 1; send_arr(low:upp) = truncatedWeight;
! initiators per excitation level
low = upp + 1; upp = low + sizes(35) - 1; send_arr(low:upp) = initsPerExLvl;
! excitation number trackers
low = upp + 1; upp = low + sizes(36) - 1; send_arr(low:upp) = nInvalidExcits;
low = upp + 1; upp = low + sizes(37) - 1; send_arr(low:upp) = nValidExcits;
! local spin
if (t_measure_local_spin) then
low = upp + 1; upp = low + sizes(38) - 1; send_arr(low:upp) = inst_local_spin;
end if
! en pert space size
if (tEN2) then
low = upp + 1; upp = low + sizes(39) - 1; send_arr(low:upp) = en_pert_main%ndets;
end if
if (t_output) then
low = upp + 1; upp = low + sizes(40) - 1; send_arr(low:upp) = HFOut
low = upp + 1; upp = low + sizes(41) - 1; send_arr(low:upp) = Acceptances
low = upp + 1; upp = low + sizes(42) - 1; send_arr(low:upp) = SumWalkersOut
low = upp + 1; upp = low + sizes(43) - 1; send_arr(low:upp) = n_core_non_init
if (t_real_time_fciqmc) then
low = upp + 1; upp = low + sizes(44) - 1; send_arr(low:upp) = popSnapShot;
end if
end if
! Perform the communication.
call MPISumAll(send_arr(1:upp), recv_arr(1:upp))
! Now we just need each result to be extracted to the correct array, with
! the correct type.
low = 0; upp = 0
low = upp + 1; upp = low + sizes(1) - 1; AllSpawnFromSing = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(2) - 1; iter_data%update_growth_tot = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(3) - 1; AllNoBorn = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(4) - 1; AllNoDied = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(5) - 1; RealAllHFCyc = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(6) - 1; AllNoAtDoubs = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(7) - 1; AllAnnihilated = recv_arr(low:upp);
if (tTruncInitiator) then
low = upp + 1; upp = low + sizes(8) - 1; AllNoAddedInitiators = nint(recv_arr(low:upp), int64);
low = upp + 1; upp = low + sizes(9) - 1; AllNoInitDets = nint(recv_arr(low:upp), int64);
low = upp + 1; upp = low + sizes(10) - 1; AllNoNonInitDets = nint(recv_arr(low:upp), int64);
low = upp + 1; upp = low + sizes(11) - 1; AllNoExtraInitDoubs = nint(recv_arr(low:upp), int64);
low = upp + 1; upp = low + sizes(12) - 1; AllInitRemoved = nint(recv_arr(low:upp), int64);
low = upp + 1; upp = low + sizes(13) - 1; AllNoAborted = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(14) - 1; AllNoRemoved = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(15) - 1; AllNoNonInitWalk = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(16) - 1; AllNoInitWalk = recv_arr(low:upp);
end if
low = upp + 1; upp = low + sizes(17) - 1; AllTotWalkers = nint(recv_arr(low), int64);
low = upp + 1; upp = low + sizes(18) - 1; all_norm_psi_squared = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(19) - 1; all_norm_semistoch_squared = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(20) - 1; AllTotParts = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(21) - 1; tot_parts_new_all = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(22) - 1; AllSumNoAtHF = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(23) - 1; all_bloom_count = nint(recv_arr(low:upp));
low = upp + 1; upp = low + sizes(24) - 1; AllNoAtHf = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(25) - 1; AllSumWalkersCyc = recv_arr(low:upp);
low = upp + 1; upp = low + sizes(26) - 1; nspawned_tot = nint(recv_arr(low), int64);
! double occ:
low = upp + 1; upp = low + sizes(27) - 1; all_inst_double_occ = recv_arr(low);
if (tTruncInitiator) then
low = upp + 1; upp = low + sizes(28) - 1; allDoubleSpawns = nint(recv_arr(low));
doubleSpawns = 0
end if
low = upp + 1; upp = low + sizes(29) - 1; AllCoherentDoubles = nint(recv_arr(low));
low = upp + 1; upp = low + sizes(30) - 1; AllIncoherentDets = nint(recv_arr(low));
low = upp + 1; upp = low + sizes(31) - 1; AllConnection = nint(recv_arr(low));
if (t_spin_measurements) then
low = upp + 1; upp = low + sizes(32) - 1; all_inst_spin_diff = recv_arr(low:upp)
low = upp + 1; upp = low + sizes(33) - 1; all_inst_spatial_doub_occ = recv_arr(low:upp)
end if
! truncated weight
low = upp + 1; upp = low + sizes(34) - 1; AllTruncatedWeight = recv_arr(low);
! initiators per excitation level
low = upp + 1; upp = low + sizes(35) - 1; AllInitsPerExLvl = nint(recv_arr(low:upp));
! excitation number trackers
low = upp + 1; upp = low + sizes(36) - 1; allNInvalidExcits = nint(recv_arr(low), int64);
low = upp + 1; upp = low + sizes(37) - 1; allNValidExcits = nint(recv_arr(low), int64);
! local spin
if (t_measure_local_spin) then
low = upp + 1; upp = low + sizes(38) - 1; all_local_spin = recv_arr(low:upp);
end if
! en_pert space size
if (tEN2) then
low = upp + 1; upp = low + sizes(39) - 1; en_pert_main%ndets_all = nint(recv_arr(low));
end if
! Output variables
if (t_output) then
low = upp + 1; upp = low + sizes(40) - 1; RealAllHFOut = recv_arr(low:upp)
low = upp + 1; upp = low + sizes(41) - 1; AllAcceptances = recv_arr(low:upp)
low = upp + 1; upp = low + sizes(42) - 1; AllSumWalkersOut = recv_arr(low:upp)
low = upp + 1; upp = low + sizes(43) - 1; all_n_core_non_init = nint(recv_arr(low))
if (t_real_time_fciqmc) then
low = upp + 1; upp = low + sizes(44) - 1; allPopSnapShot = recv_arr(low:upp);
end if
end if
! Communicate HElement_t variables:
low = 0; upp = 0;
sizes(1) = size(ENumCyc)
sizes(2) = size(SumENum)
sizes(3) = size(ENumCycAbs)
sizes(4) = size(cyc_proje_denominator)
sizes(5) = size(sum_proje_denominator)
if (t_comm_trial) then
sizes(6) = size(trial_numerator)
sizes(7) = size(trial_denom)
sizes(8) = size(trial_num_inst)
sizes(9) = size(trial_denom_inst)
sizes(10) = size(init_trial_numerator)
sizes(11) = size(init_trial_denom)
end if
sizes(12) = size(InitsEnumCyc)
sizes(13) = size(ENumOut)
if (sum(sizes(1:14)) > arr_helem_size) call stop_all(t_r, "No space left in arrays for communication of estimates. Please &
& increase the size of the send_arr_helem and recv_arr_helem &
& arrays in the source code.")
low = upp + 1; upp = low + sizes(1) - 1; send_arr_helem(low:upp) = ENumCyc;
low = upp + 1; upp = low + sizes(2) - 1; send_arr_helem(low:upp) = SumENum;
low = upp + 1; upp = low + sizes(3) - 1; send_arr_helem(low:upp) = ENumCycAbs;
low = upp + 1; upp = low + sizes(4) - 1; send_arr_helem(low:upp) = cyc_proje_denominator;
low = upp + 1; upp = low + sizes(5) - 1; send_arr_helem(low:upp) = sum_proje_denominator;
if (t_comm_trial) then
low = upp + 1; upp = low + sizes(6) - 1; send_arr_helem(low:upp) = trial_numerator;
low = upp + 1; upp = low + sizes(7) - 1; send_arr_helem(low:upp) = trial_denom;
low = upp + 1; upp = low + sizes(8) - 1; send_arr_helem(low:upp) = trial_num_inst;
low = upp + 1; upp = low + sizes(9) - 1; send_arr_helem(low:upp) = trial_denom_inst;
low = upp + 1; upp = low + sizes(10) - 1; send_arr_helem(low:upp) = init_trial_numerator;
low = upp + 1; upp = low + sizes(11) - 1; send_arr_helem(low:upp) = init_trial_denom;
end if
low = upp + 1; upp = low + sizes(12) - 1; send_arr_helem(low:upp) = InitsENumCyc;
if (t_output) then
low = upp + 1; upp = low + sizes(13) - 1; send_arr_helem(low:upp) = ENumOut;
end if
call MPISumAll(send_arr_helem(1:upp), recv_arr_helem(1:upp))
low = 0; upp = 0;
low = upp + 1; upp = low + sizes(1) - 1; AllENumCyc = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(2) - 1; AllSumENum = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(3) - 1; AllENumCycAbs = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(4) - 1; all_cyc_proje_denominator = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(5) - 1; all_sum_proje_denominator = recv_arr_helem(low:upp);
if (t_comm_trial) then
low = upp + 1; upp = low + sizes(6) - 1; tot_trial_numerator = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(7) - 1; tot_trial_denom = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(8) - 1; tot_trial_num_inst = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(9) - 1; tot_trial_denom_inst = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(10) - 1; tot_init_trial_numerator = recv_arr_helem(low:upp);
low = upp + 1; upp = low + sizes(11) - 1; tot_init_trial_denom = recv_arr_helem(low:upp);
end if
low = upp + 1; upp = low + sizes(12) - 1; AllInitsENumCyc = recv_arr_helem(low:upp);
if (t_output) then
low = upp + 1; upp = low + sizes(13) - 1; AllEnumOut = recv_arr_helem(low:upp);
end if
! Optionally communicate EXLEVEL_WNorm.
if (tLogEXLEVELStats) then
upp = size(EXLEVEL_WNorm)
send_arr_WNorm(1:upp) = reshape(EXLEVEL_WNorm, (/upp/))
call MPISumAll(send_arr_WNorm(1:upp), recv_arr_WNorm(1:upp))
AllEXLEVEL_WNorm = reshape(recv_arr_WNorm(1:upp), &
shape(AllEXLEVEL_WNorm))
! Apply square root for L2 norm.
AllEXLEVEL_WNorm(2, :, :) = sqrt(AllEXLEVEL_WNorm(2, :, :))
end if
! Do some processing of the received data.
! Convert real array into HElement_t one.
do run = 1, inum_runs
AllHFCyc(run) = ARR_RE_OR_CPLX(RealAllHFCyc, run)
AllHFOut(run) = ARR_RE_OR_CPLX(RealAllHFOut, run)
end do
#ifdef CMPLX_
norm_psi = sqrt(sum(all_norm_psi_squared))
norm_semistoch = sqrt(sum(all_norm_semistoch_squared))
#else
norm_psi = sqrt(all_norm_psi_squared)
norm_semistoch = sqrt(all_norm_semistoch_squared)
#endif
! These require a different type of reduce operation, so are communicated
! separately to the above communication.
call MPIAllReduce(bloom_sizes(1:2), MPI_MAX, bloom_sz_tmp(1:2))
bloom_sizes(1:2) = bloom_sz_tmp(1:2)
! Arrays for checking load balancing.
call MPIReduce(TotWalkersTemp, MPI_MAX, MaxWalkersProc)
call MPIReduce(TotWalkersTemp, MPI_MIN, MinWalkersProc)
call MPIReduce(max_cyc_spawn, MPI_MAX, all_max_cyc_spawn)
end subroutine communicate_estimates