fcimc_iter_utilities.F90 Source File


Source Code

#include "macros.h"

module fcimc_iter_utils

    use SystemData, only: nel, tHPHF, tNoBrillouin, tRef_Not_HF
    use CalcData, only: tSemiStochastic, tChangeProjEDet, tTrialWavefunction, &
                        tCheckHighestPopOnce, tRestartHighPop, StepsSft, &
                        tTruncInitiator, tJumpShift, TargetGrowRate, &
                        tLetInitialPopDie, InitWalkers, tCheckHighestPop, &
                        HFPopThresh, DiagSft, DiagSft2, tShiftOnHFPop, iRestartWalkNum, &
                        FracLargerDet, tKP_FCIQMC, MaxNoatHF, SftDamp, SftDamp2, &
                        nShiftEquilSteps, TargetGrowRateWalk, tContTimeFCIMC, &
                        tContTimeFull, pop_change_min, tPositiveHFSign, &
                        qmc_trial_wf, nEquilSteps, &
                        tSkipRef, N0_Target, tSpinProject, &
                        tFixedN0, tEN2, tTrialShift, tFixTrial, TrialTarget, &
                        tDynamicAvMCEx, AvMCExcits, tTargetShiftdamp, tInitShift, &
                        tNonInitModShift, TargetWalkers1, TargetWalkers2, DiagSft2Init, &
                        modShiftTargetIter, tmodShiftPhase, modShiftConstIters, tRenorm, tBosonNoSpawn, &
                        tModShiftTarget

    use tau_main, only: t_scale_tau_to_death, scale_tau_to_death_triggered, &
        tau_search_method, input_tau_search_method, possible_tau_search_methods, &
        tau_stop_method, end_of_search_reached, scale_tau_to_death, tau, &
        stop_tau_search
    use tau_search_hist, only: t_fill_frequency_hists

    use cont_time_rates, only: cont_spawn_success, cont_spawn_attempts
    use semi_stoch_procs, only: recalc_core_hamil_diag
    use bit_rep_data, only: NIfD, NIfTot, test_flag, test_flag_multi
    use hphf_integrals, only: hphf_diag_helement
    use Determinants, only: get_helement
    use LoggingData, only: tPrintDataTables, tLogEXLEVELStats, t_spin_measurements, &
        StepsPrint
    use LoggingData, only: tFCIMCStats2, t_calc_double_occ, t_calc_double_occ_av, &
                           AllInitsPerExLvl, initsPerExLvl, tCoupleCycleOutput, &
                           t_measure_local_spin, tRDMOnFly, tExplicitAllRDM
    use tau_search_conventional, only: update_tau
    use rdm_data, only: en_pert_main, InstRDMCorrectionFactor
    use Parallel_neci, only: MPIAllReduce, MPI_SUM, MPI_MAX, iProcIndex, root, &
        MPI_MIN, MPI_2INTEGER, MPIGather, MPIReduce, mpialllorlogical
    use fcimc_initialisation, only: DeallocFCIMCMemPar, SetupParameters, InitFCIMCCalcPar
    use fcimc_output, only: write_fcimcstats2, WriteFciMCStatsHeader, &
        WriteFCIMCStats
    use fcimc_helper
    use FciMCData
    use real_time_procs, only: normalize_gf_overlap
    use real_time_data, only: current_overlap, overlap_real, overlap_imag, &
                              t_real_time_fciqmc
    use real_time_data, only: allPopSnapshot, popSnapshot
    use double_occ_mod, only: inst_double_occ, all_inst_double_occ, sum_double_occ, &
                              sum_norm_psi_squared, all_inst_spatial_doub_occ, &
                              rezero_double_occ_stats, rezero_spin_diff, &
                              all_inst_spin_diff, inst_spin_diff, inst_spatial_doub_occ

    use tau_search_hist, only: update_tau_hist

    use local_spin, only: all_local_spin, inst_local_spin, rezero_local_spin_stats

    use PopsfileMod, only: ChangeRefDet

    better_implicit_none
    private
    public :: output_diagnostics, update_iter_data, calculate_new_shift_wrapper, &
        collate_iter_data, iter_diagnostics, population_check, &
        update_shift, iteration_output_wrapper, fix_trial_overlap

contains

    subroutine output_diagnostics()
        ! Updates Time measures and gets the acceptance rate printed in output

        ! Update the total imaginary time passed
        TotImagTime = TotImagTime + StepsPrint * Tau

        ! Set Iter time to equal the average time per iteration in the
        ! previous update cycle.
        IterTime = IterTime / real(StepsPrint, sp)

        ! Do the same averaging for allNValidExcits and allNInvalidExcits
        allNValidExcits = nint(real(allNValidExcits, dp) / real(StepsPrint, dp), int64)
        allNInvalidExcits = nint(real(allNInvalidExcits, dp) / real(StepsPrint, dp), int64)

        ! Calculate the acceptance ratio
        if (tContTimeFCIMC .and. .not. tContTimeFull) then
            if (.not. near_zero(real(cont_spawn_attempts))) then
                AccRat = real(cont_spawn_success) / real(cont_spawn_attempts)
            else
                AccRat = 0.0_dp
            end if
        else
            if (.not. any(near_zero(AllSumWalkersOut))) then
                AccRat = real(AllAcceptances, dp) / AllSumWalkersOut
            else
                AccRat = 0.0_dp
            end if
        end if
    end subroutine output_diagnostics

    ! TODO: COMMENTING
    subroutine iter_diagnostics()

        character(*), parameter :: this_routine = 'iter_diagnostics'
        character(*), parameter :: t_r = this_routine
        integer :: run, part_type

#ifndef CMPLX_
        if (tPositiveHFSign) then
            do part_type = 1, lenof_sign
                if ((.not. tFillingStochRDMonFly) .or. (inum_runs == 1)) then
                    if (AllNoAtHF(part_type) < 0.0_dp) then
                        root_print 'No. at HF < 0 - flipping sign of entire ensemble ' &
                                   // 'of particles in simulation: ', part_type
                        root_print AllNoAtHF(part_type)

                        ! And do the flipping
                        call FlipSign(part_type)
                        AllNoatHF(part_type) = -AllNoatHF(part_type)
                        NoatHF(part_type) = -NoatHF(part_type)

                        if (tFillingStochRDMonFly) then
                            ! Want to flip all the averaged signs.
                            AvNoatHF = -AVNoatHF
                            InstNoatHF(part_type) = -InstNoatHF(part_type)
                        end if
                    end if
                end if
            end do
        end if
#endif
        if (iProcIndex == Root) then
            ! Have all of the particles died?
#ifdef CMPLX_
            tRestart = .false.
            do run = 1, inum_runs
                if (near_zero(sum(AllTotParts(min_part_type(run):max_part_type(run))))) then
                    call stop_all(t_r, "All particles have died. Aborting.")
                end if
            end do
#else
            if (near_zero(AllTotParts(1)) .or. near_zero(AllTotParts(inum_runs))) then
                call stop_all(t_r, "All particles have died. Aborting.")
            else
                tRestart = .false.
            end if
            !TODO CMO: Work out how to wipe the walkers on the second population if double run
#endif
        end if
        call MPIBCast(tRestart)
        if (tRestart) then
            ! a restart not wanted in the real-time fciqmc..
            !Initialise variables for calculation on each node
            CALL DeallocFCIMCMemPar()
            IF (iProcIndex == Root) THEN
                close(fcimcstats_unit)
                if (inum_runs == 2) close(fcimcstats_unit2)
                IF (tTruncInitiator) close(initiatorstats_unit)
                IF (tLogComplexPops) close(complexstats_unit)
                if (tLogEXLEVELStats) close(EXLEVELStats_unit)
            end if
            IF (TDebug) close(11)
            CALL SetupParameters()
            CALL InitFCIMCCalcPar()
            if (tFCIMCStats2) then
                call write_fcimcstats2(iter_data_fciqmc, initial=.true.)
                call write_fcimcstats2(iter_data_fciqmc)
            else
                call WriteFciMCStatsHeader()
                ! Prepend a # to the initial status line so analysis doesn't pick up
                ! repetitions in the FCIMCStats or INITIATORStats files from restarts.
                if (iProcIndex == root) then
                    write(fcimcstats_unit, '("#")', advance='no')
                    if (inum_runs == 2) &
                        write(fcimcstats_unit2, '("#")', advance='no')
                    write(initiatorstats_unit, '("#")', advance='no')
                end if
                call WriteFCIMCStats()
            end if
            Iter = 1
            if (iProcIndex == root .and. tLogEXLEVELStats) &
                write(EXLEVELStats_unit, '("#")', advance='no')
            return
        end if

        ! update the number of spawning attempts per walker
        if (tDynamicAvMCEx) then
            if (allNValidExcits /= 0) then
                ! we try to have approx. one valid excitation generated per walker
                AvMCExcits = (allNValidExcits + allNInvalidExcits) / (allNValidExcits)
                write(stdout, *) "Now spawning ", AvMCExcits, " times per walker"
            end if
        end if

    end subroutine iter_diagnostics

    subroutine population_check()

        use HPHFRandExcitMod, only: ReturnAlphaOpenDet

        integer(int32) :: pop_highest(inum_runs), proc_highest(inum_runs)
        real(dp) :: pop_change, old_Hii
        integer :: det(nel), i, error, ierr, run
        integer(int32) :: int_tmp(2)
        logical :: tSwapped, allocate_temp_parts, changed_any
        HElement_t(dp) :: h_tmp
        character(*), parameter :: this_routine = 'population_check'
        character(*), parameter :: t_r = this_routine

        ! If we aren't doing this, then bail out...
        if (.not. tCheckHighestPop) return

        ! If we are accumulating RDMs, then a temporary spawning array is
        ! required of <~ the size of the largest occupied det.
        !
        ! This memory holds walkers spawned from one determinant. This
        ! allows us to test if we are spawning onto the same Dj multiple
        ! times. If only using connections to the HF (tHF_Ref_Explicit)
        ! no stochastic RDM construction is done, and this is not
        ! necessary.
        if (tRDMOnFly .and. .not. tExplicitAllRDM) then

            ! Test if we need to allocate or re-allocate the temporary
            ! spawned parts array
            allocate_temp_parts = .false.
            if (.not. allocated(TempSpawnedParts)) then
                allocate_temp_parts = .true.
                TempSpawnedPartsSize = 1000
            end if
            if (1.5 * maxval(iHighestPop) > TempSpawnedPartsSize) then
                ! This testing routine is only called once every update
                ! cycle. The 1.5 gives us a buffer to cope with particle
                ! growth
                TempSpawnedPartsSize = int(maxval(iHighestPop) * 1.5)
                allocate_temp_parts = .true.
                !write(stdout,*) 1.5 * maxval(iHighestPop), TempSpawnedPartsSize
            end if

            ! If we need to allocate this array, then do so.
            if (allocate_temp_parts) then
                if (allocated(TempSpawnedParts)) then
                    deallocate(TempSpawnedParts)
                    log_dealloc(TempSpawnedPartsTag)
                end if
                allocate(TempSpawnedParts(0:nifd, TempSpawnedPartsSize), &
                          stat=ierr, source=0_n_int)
                call LogMemAlloc('TempSpawnedParts', size(TempSpawnedParts, kind=int64), size_per_element(TempSpawnedParts), &
                                 this_routine, TempSpawnedPartsTag, ierr)
                write (stdout, "(' Allocating temporary array for walkers spawned " &
                           // "from a particular Di.')")
                write(stdout, "(a,f14.6,a)") " This requires ", &
                    real(((nifd + 1) * TempSpawnedPartsSize * size_n_int), dp) &
                    / 1048576.0_dp, " Mb/Processor"
            end if

        end if ! Allocating memory for RDMs

        ! Obtain the determinant (and its processor) with the highest pop
        ! in each of the runs.
        ! n.b. the use of int(iHighestPop) obviously introduces a small amount
        !      of error here, by ignoring the fractional part...
        ! [Werner Dobrautz 15.5.2017:]
        ! maybe this samll error here is the cause of the failed test_suite
        ! runs..
        if (tReplicaReferencesDiffer) then

            do run = 1, inum_runs
                call MPIAllReduceDatatype( &
                    (/int(iHighestPop(run), int32), int(iProcIndex, int32)/), 1, &
                    MPI_MAXLOC, MPI_2INTEGER, int_tmp)
                pop_highest(run) = int_tmp(1)
                proc_highest(run) = int_tmp(2)
            end do
        else

            call MPIAllReduceDatatype( &
                (/int(iHighestPop(1), int32), int(iProcIndex, int32)/), 1, &
                MPI_MAXLOC, MPI_2INTEGER, int_tmp)
            pop_highest = int_tmp(1)
            proc_highest = int_tmp(2)

        end if

        changed_any = .false.
        do run = 1, inum_runs

            ! If using the same reference for all, then we don't consider the
            ! populations seperately...
            if (run /= 1 .and. .not. tReplicaReferencesDiffer) &
                exit

            ! What are the change conditions?
#ifdef CMPLX_
            if (tReplicaReferencesDiffer) then
                pop_change = FracLargerDet * abs_sign(AllNoAtHF(min_part_type(run):max_part_type(run)))
            else
                pop_change = FracLargerDet * abs_sign(AllNoAtHF(1:(lenof_sign / inum_runs)))
            end if
#else
            if (tReplicaReferencesDiffer) then
                pop_change = FracLargerDet * abs(AllNoAtHF(run))
            else
                pop_change = FracLargerDet * abs(AllNoAtHF(1))
            end if
#endif
!            write(stdout,*) "***",AllNoAtHF,FracLargerDet,pop_change, pop_highest,proc_highest
            ! Do we need to do a change?
            ! is this a valid comparison?? we ware comparing a real(dp) pop_change
            ! with a (now) 32 bit integer..
            if (pop_change < real(pop_highest(run), dp) .and. &
                real(pop_highest(run), dp) > pop_change_min) then

                if (tChangeProjEDet) then

                    ! Write out info!
                    changed_any = .true.
                    root_print 'Highest weighted determinant on run', run, &
                        'not reference det: ', pop_highest, abs_sign(AllNoAtHF( &
                                                                     min_part_type(run):max_part_type(run)))

                    !
                    ! Here we are changing the reference det on the fly.
                    ! --> else block for restarting simulation.
                    !

                    ! Communicate the change to all dets and print out.
                    ! [W.D. 15.5.2017:]
                    ! we are typecasting here too..
                    ! we are casting a 32 bit int to a 64 bit ...
                    ! that could cause troubles!
!                     call MPIBcast (HighestPopDet(0:NIfTot, run), NIfTot+1, &
!                                    int(proc_highest(run),n_int))
                    call MPIBcast(HighestPopDet(0:NIfTot, run), NIfTot + 1, &
                                  int(proc_highest(run)))

                    call update_run_reference(HighestPopDet(:, run), run)

                    ! Reset averages
                    SumENum = 0.0_dp
                    sum_proje_denominator = 0.0_dp
                    cyc_proje_denominator = 0.0_dp
                    SumNoatHF = 0.0_dp
                    VaryShiftCycles = 0
                    SumDiagSft = 0.0_dp
                    root_print 'Zeroing all energy estimators.'

                    !Since we have a new reference, we must block only from after this point
                    iBlockingIter = Iter + PreviousCycles

                    ! Reset values introduced in soft_exit (CHANGEVARS)
                    if (tCHeckHighestPopOnce) then
                        tChangeProjEDet = .false.
                        tCheckHighestPop = .false.
                        tCheckHighestPopOnce = .false.
                    end if

                    ! Or are we restarting the calculation with the reference
                    ! det switched?
#ifdef CMPLX_
                else if (tRestartHighPop .and. &
                        iRestartWalkNum < sum(AllTotParts(1:2))) then
#else
                else if (tRestartHighPop .and. &
                        iRestartWalkNum < AllTotParts(1)) then
#endif
                    !
                    ! Here we are restarting the simulation with a new
                    ! reference. See above block for doing it on the fly.
                    !

                    ! Broadcast the changed det to all processors
!                     call MPIBcast (HighestPopDet(:,run), NIfTot+1, &
!                                    int(proc_highest(run),n_int))
                    call MPIBcast(HighestPopDet(:, run), NIfTot + 1, &
                                  int(proc_highest(run)))

                    call update_run_reference(HighestPopDet(:, run), run)

                    ! Only update the global reference energies if they
                    ! correspond to run 1 (which is used for those)
                    if (run == 1) then
                        call ChangeRefDet(ProjEDet(:, 1))
                    end if

                    ! Reset values introduced in soft_exit (CHANGEVARS)
                    if (tCHeckHighestPopOnce) then
                        tChangeProjEDet = .false.
                        tCheckHighestPop = .false.
                        tCheckHighestPopOnce = .false.
                    end if

                end if

            end if
        end do

    end subroutine population_check

    subroutine communicate_estimates(iter_data, tot_parts_new, tot_parts_new_all, t_output)

        ! This routine sums all estimators and stats over all processes.

        ! We want this to be done in as few MPI calls as possible. Therefore, all
        ! quantities are first placed into one of two arrays. There is one array
        ! for HElement_t(dp) estimates, and another array (real(dp)) for all other
        ! kinds and types. A single MPISumAll is then performed for both of these
        ! combined arrays. After communication, the summed results are copied to
        ! the appropriate final arrays, with the type and kind corrected when
        ! necessary.

        ! There are also a few separate MPI calls which reduce using MPI_MAX and
        ! MPI_MIN at the end.

        ! -- *IMPORTANT FOR DEVELOPERS* ---------------------------------------
        ! To add a new quantity to be communicated, you must give it a new entry
        ! in send_arr or send_arr_helem array. It is hopefully clear how to do
        ! this by analogy. You should also update the indices in the appropriate
        ! stop_all, so that it can be checked if enough memory has been assigned.
        use adi_data, only: nCoherentDoubles, nIncoherentDets, nConnection, &
                            AllCoherentDoubles, AllIncoherentDets, AllConnection
        type(fcimc_iter_data) :: iter_data
        real(dp), intent(in) :: tot_parts_new(lenof_sign)
        real(dp), intent(out) :: tot_parts_new_all(lenof_sign)

        ! RT_M_Merge: Added real-time statistics for the newer communication scheme
        integer, parameter :: real_arr_size = 2000
        integer, parameter :: hel_arr_size = 200
        ! RT_M_Merge: Doubled all array sizes since there are now two
        ! copies of most of the variables (necessary?)

        ! Allow room to send up to 1000 (2000 for rt) elements.
        real(dp) :: send_arr(real_arr_size)
        ! Allow room to receive up to 2000 elements.
        real(dp) :: recv_arr(real_arr_size)
        logical, intent(in) :: t_output
        logical :: t_comm_trial
        ! Equivalent arrays for HElement_t variables.
        integer, parameter :: arr_helem_size = 300
        HElement_t(dp) :: send_arr_helem(arr_helem_size)
        HElement_t(dp) :: recv_arr_helem(arr_helem_size)
        ! Equivalent arrays for EXLEVELStats (of exactly required size).
        real(dp) :: send_arr_WNorm(3 * (NEl + 1) * inum_runs), &
                    recv_arr_WNorm(3 * (NEl + 1) * inum_runs)
        ! Allow room for 100 different arrays to be communicated.
        integer, parameter :: size_arr_size = 100
        integer :: sizes(size_arr_size)
        integer :: low, upp, run

        integer(int64) :: TotWalkersTemp
        ! [W.D.12.12.2017]
        ! allow for triples now:
        ! Todo: make that more flexible in the future!
        real(dp) :: bloom_sz_tmp(0:3)
        real(dp) :: RealAllHFCyc(max(lenof_sign, inum_runs))
        real(dp) :: RealAllHFOut(max(lenof_sign, inum_runs))
        real(dp) :: all_norm_semistoch_squared(inum_runs)
        integer :: NoArrs
        character(len=*), parameter :: t_r = 'communicate_estimates'
        integer :: cnt

        ! Remove the holes in the main list when wanting the number of uniquely
        ! occupied determinants.
        TotWalkersTemp = TotWalkers - HolesInList

        ! The trial wavefunction is communicated before output and only if the option is on
        t_comm_trial = t_output .and. tTrialWavefunction

        sizes = 0

        ! low will represent the lower bound of an array slice.
        low = 0
        ! upp will represent the upper bound of an array slice.
        upp = 0

        sizes(1) = size(SpawnFromSing)
        sizes(2) = size(iter_data%update_growth)
        sizes(3) = size(NoBorn)
        sizes(4) = size(NoDied)
        sizes(5) = size(HFCyc)
        sizes(6) = size(NoAtDoubs)
        sizes(7) = size(Annihilated)
        if (tTruncInitiator) then
            sizes(8) = size(NoAddedInitiators)
            sizes(9) = size(NoInitDets)
            sizes(10) = size(NoNonInitDets)
            sizes(11) = size(NoExtraInitDoubs)
            sizes(12) = size(InitRemoved)
            sizes(13) = size(NoAborted)
            sizes(14) = size(NoRemoved)
            sizes(15) = size(NoNonInitWalk)
            sizes(16) = size(NoInitWalk)
        end if
        sizes(17) = 1 ! TotWalkersTemp (single int, not an array)
        sizes(18) = size(norm_psi_squared)
        sizes(19) = size(norm_semistoch_squared)
        sizes(20) = size(TotParts)
        sizes(21) = size(tot_parts_new)
        sizes(22) = size(SumNoAtHF)
        sizes(23) = size(bloom_count)
        sizes(24) = size(NoAtHF)
        sizes(25) = size(SumWalkersCyc)
        sizes(26) = 1 ! nspawned (single int, not an array)

        sizes(27) = 1 ! inst_double_occ
        if (tTruncInitiator) sizes(28) = 1 ! doubleSpawns
        ! communicate the coherence numbers for SI
        sizes(29) = 1

        sizes(30) = 1
        ! Perturbation correction
        sizes(31) = 1

        ! communicate the instant spin diff.. although i am not sure if this
        ! gets too big..
        if (t_spin_measurements) then
            sizes(32) = nBasis / 2
            sizes(33) = nBasis / 2
        end if
        ! truncated weight
        sizes(34) = 1
        ! inits per ex lvl
        sizes(35) = size(initsPerExLvl)
        ! number of successful/invalid excits
        sizes(36) = 1
        sizes(37) = 1

        if (t_measure_local_spin) then
            sizes(38) = nBasis / 2
        end if
        ! en pert space size
        if (tEN2) sizes(39) = 1
        ! Output variable
        if (t_output) then
            sizes(40) = size(HFOut)
            sizes(41) = size(Acceptances)
            sizes(42) = size(SumWalkersOut)
            sizes(43) = 1
        end if
        if (t_real_time_fciqmc) then
            sizes(44) = size(popSnapShot)
            NoArrs = 44
        else
            NoArrs = 43
        end if

        if (tNonInitModShift) then
            sizes(44) = size(NoModShiftWalk)
            sizes(45) = size(NoNonModShiftWalk)
            NoArrs = 45
        else
            NoArrs = 43
        end if

        if (tBosonNoSpawn) then
            sizes(46) = size(NoBosonActiveWalk)
            sizes(47) = size(NoSpawnDets)
            sizes(48) = size(NoSpawnDetsEmpty)
            NoArrs = 48
        else
            NoArrs = 45
        end if

        send_arr = 0.0_dp

        if (sum(sizes(1:NoArrs)) > real_arr_size) call stop_all(t_r, &
             "No space left in arrays for communication of estimates. Please increase " &
             // " the size of the send_arr and recv_arr arrays in the source code.")

        low = upp + 1; upp = low + sizes(1) - 1; send_arr(low:upp) = SpawnFromSing;
        low = upp + 1; upp = low + sizes(2) - 1; send_arr(low:upp) = iter_data%update_growth;
        low = upp + 1; upp = low + sizes(3) - 1; send_arr(low:upp) = NoBorn;
        low = upp + 1; upp = low + sizes(4) - 1; send_arr(low:upp) = NoDied;
        low = upp + 1; upp = low + sizes(5) - 1; send_arr(low:upp) = HFCyc;
        low = upp + 1; upp = low + sizes(6) - 1; send_arr(low:upp) = NoAtDoubs;
        low = upp + 1; upp = low + sizes(7) - 1; send_arr(low:upp) = Annihilated;
        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(8) - 1; send_arr(low:upp) = NoAddedInitiators;
            low = upp + 1; upp = low + sizes(9) - 1; send_arr(low:upp) = NoInitDets;
            low = upp + 1; upp = low + sizes(10) - 1; send_arr(low:upp) = NoNonInitDets;
            low = upp + 1; upp = low + sizes(11) - 1; send_arr(low:upp) = NoExtraInitDoubs;
            low = upp + 1; upp = low + sizes(12) - 1; send_arr(low:upp) = InitRemoved;
            low = upp + 1; upp = low + sizes(13) - 1; send_arr(low:upp) = NoAborted;
            low = upp + 1; upp = low + sizes(14) - 1; send_arr(low:upp) = NoRemoved;
            low = upp + 1; upp = low + sizes(15) - 1; send_arr(low:upp) = NoNonInitWalk;
            low = upp + 1; upp = low + sizes(16) - 1; send_arr(low:upp) = NoInitWalk;
        end if

        low = upp + 1; upp = low + sizes(17) - 1; send_arr(low:upp) = TotWalkersTemp;
        low = upp + 1; upp = low + sizes(18) - 1; send_arr(low:upp) = norm_psi_squared;
        low = upp + 1; upp = low + sizes(19) - 1; send_arr(low:upp) = norm_semistoch_squared;
        low = upp + 1; upp = low + sizes(20) - 1; send_arr(low:upp) = TotParts;
        low = upp + 1; upp = low + sizes(21) - 1; send_arr(low:upp) = tot_parts_new;
        low = upp + 1; upp = low + sizes(22) - 1; send_arr(low:upp) = SumNoAtHf;
        low = upp + 1; upp = low + sizes(23) - 1; send_arr(low:upp) = bloom_count;
        low = upp + 1; upp = low + sizes(24) - 1; send_arr(low:upp) = NoAtHF;
        low = upp + 1; upp = low + sizes(25) - 1; send_arr(low:upp) = SumWalkersCyc;
        low = upp + 1; upp = low + sizes(26) - 1; send_arr(low:upp) = nspawned;
        ! double occ change:
        low = upp + 1; upp = low + sizes(27) - 1; send_arr(low:upp) = inst_double_occ

        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(28) - 1; send_arr(low:upp) = doubleSpawns;
        end if
        low = upp + 1; upp = low + sizes(29) - 1; send_arr(low:upp) = nCoherentDoubles
        low = upp + 1; upp = low + sizes(30) - 1; send_arr(low:upp) = nIncoherentDets
        low = upp + 1; upp = low + sizes(31) - 1; send_arr(low:upp) = nConnection

        if (t_spin_measurements) then
            low = upp + 1; upp = low + sizes(32) - 1; send_arr(low:upp) = inst_spin_diff
            low = upp + 1; upp = low + sizes(33) - 1; send_arr(low:upp) = inst_spatial_doub_occ
        end if

        ! truncated weight
        low = upp + 1; upp = low + sizes(34) - 1; send_arr(low:upp) = truncatedWeight;
        ! initiators per excitation level
        low = upp + 1; upp = low + sizes(35) - 1; send_arr(low:upp) = initsPerExLvl;
        ! excitation number trackers

        low = upp + 1; upp = low + sizes(36) - 1; send_arr(low:upp) = nInvalidExcits;
        low = upp + 1; upp = low + sizes(37) - 1; send_arr(low:upp) = nValidExcits;
        ! local spin
        if (t_measure_local_spin) then
            low = upp + 1; upp = low + sizes(38) - 1; send_arr(low:upp) = inst_local_spin;
        end if

        ! en pert space size
        if (tEN2) then
            low = upp + 1; upp = low + sizes(39) - 1; send_arr(low:upp) = en_pert_main%ndets;
        end if

        if (t_output) then
            low = upp + 1; upp = low + sizes(40) - 1; send_arr(low:upp) = HFOut
            low = upp + 1; upp = low + sizes(41) - 1; send_arr(low:upp) = Acceptances
            low = upp + 1; upp = low + sizes(42) - 1; send_arr(low:upp) = SumWalkersOut
            low = upp + 1; upp = low + sizes(43) - 1; send_arr(low:upp) = n_core_non_init
            if (t_real_time_fciqmc) then
                low = upp + 1; upp = low + sizes(44) - 1; send_arr(low:upp) = popSnapShot;
            end if
        end if

        if (tNonInitModShift) then
            low = upp + 1; upp = low + sizes(44) - 1; send_arr(low:upp) = NoModShiftWalk;
            low = upp + 1; upp = low + sizes(45) - 1; send_arr(low:upp) = NoNonModShiftWalk;
        end if

        if (tBosonNoSpawn) then
            low = upp + 1; upp = low + sizes(46) - 1; send_arr(low:upp) = NoBosonActiveWalk;
            low = upp + 1; upp = low + sizes(47) - 1; send_arr(low:upp) = NoSpawnDets;
            low = upp + 1; upp = low + sizes(48) - 1; send_arr(low:upp) = NoSpawnDetsEmpty;
        end if

        ! Perform the communication.
        call MPISumAll(send_arr(1:upp), recv_arr(1:upp))

        ! Now we just need each result to be extracted to the correct array, with
        ! the correct type.

        low = 0; upp = 0

        low = upp + 1; upp = low + sizes(1) - 1; AllSpawnFromSing = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(2) - 1; iter_data%update_growth_tot = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(3) - 1; AllNoBorn = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(4) - 1; AllNoDied = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(5) - 1; RealAllHFCyc = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(6) - 1; AllNoAtDoubs = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(7) - 1; AllAnnihilated = recv_arr(low:upp);
        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(8) - 1; AllNoAddedInitiators = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(9) - 1; AllNoInitDets = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(10) - 1; AllNoNonInitDets = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(11) - 1; AllNoExtraInitDoubs = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(12) - 1; AllInitRemoved = nint(recv_arr(low:upp), int64);
            low = upp + 1; upp = low + sizes(13) - 1; AllNoAborted = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(14) - 1; AllNoRemoved = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(15) - 1; AllNoNonInitWalk = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(16) - 1; AllNoInitWalk = recv_arr(low:upp);
        end if
        low = upp + 1; upp = low + sizes(17) - 1; AllTotWalkers = nint(recv_arr(low), int64);
        low = upp + 1; upp = low + sizes(18) - 1; all_norm_psi_squared = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(19) - 1; all_norm_semistoch_squared = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(20) - 1; AllTotParts = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(21) - 1; tot_parts_new_all = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(22) - 1; AllSumNoAtHF = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(23) - 1; all_bloom_count = nint(recv_arr(low:upp));
        low = upp + 1; upp = low + sizes(24) - 1; AllNoAtHf = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(25) - 1; AllSumWalkersCyc = recv_arr(low:upp);
        low = upp + 1; upp = low + sizes(26) - 1; nspawned_tot = nint(recv_arr(low), int64);
        ! double occ:
        low = upp + 1; upp = low + sizes(27) - 1; all_inst_double_occ = recv_arr(low);
        if (tTruncInitiator) then
            low = upp + 1; upp = low + sizes(28) - 1; allDoubleSpawns = nint(recv_arr(low));
            doubleSpawns = 0
        end if
        low = upp + 1; upp = low + sizes(29) - 1; AllCoherentDoubles = nint(recv_arr(low));
        low = upp + 1; upp = low + sizes(30) - 1; AllIncoherentDets = nint(recv_arr(low));
        low = upp + 1; upp = low + sizes(31) - 1; AllConnection = nint(recv_arr(low));
        if (t_spin_measurements) then
            low = upp + 1; upp = low + sizes(32) - 1; all_inst_spin_diff = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(33) - 1; all_inst_spatial_doub_occ = recv_arr(low:upp)
        end if

        ! truncated weight
        low = upp + 1; upp = low + sizes(34) - 1; AllTruncatedWeight = recv_arr(low);
        ! initiators per excitation level
        low = upp + 1; upp = low + sizes(35) - 1; AllInitsPerExLvl = nint(recv_arr(low:upp));
        ! excitation number trackers
        low = upp + 1; upp = low + sizes(36) - 1; allNInvalidExcits = nint(recv_arr(low), int64);
        low = upp + 1; upp = low + sizes(37) - 1; allNValidExcits = nint(recv_arr(low), int64);

        ! local spin
        if (t_measure_local_spin) then
            low = upp + 1; upp = low + sizes(38) - 1; all_local_spin = recv_arr(low:upp);
        end if

        ! en_pert space size
        if (tEN2) then
            low = upp + 1; upp = low + sizes(39) - 1; en_pert_main%ndets_all = nint(recv_arr(low));
        end if
        ! Output variables
        if (t_output) then
            low = upp + 1; upp = low + sizes(40) - 1; RealAllHFOut = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(41) - 1; AllAcceptances = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(42) - 1; AllSumWalkersOut = recv_arr(low:upp)
            low = upp + 1; upp = low + sizes(43) - 1; all_n_core_non_init = nint(recv_arr(low))
            if (t_real_time_fciqmc) then
                low = upp + 1; upp = low + sizes(44) - 1; allPopSnapShot = recv_arr(low:upp);
            end if
        end if

        if (tNonInitModShift) then
            low = upp + 1; upp = low + sizes(44) - 1; AllNoModShiftWalk = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(45) - 1; AllNoNonModShiftWalk = recv_arr(low:upp);
        end if

        if (tBosonNoSpawn) then
            low = upp + 1; upp = low + sizes(46) - 1; AllNoBosonActiveWalk = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(47) - 1; AllNoSpawnDets = recv_arr(low:upp);
            low = upp + 1; upp = low + sizes(48) - 1; AllNoSpawnDetsEmpty = recv_arr(low:upp);
        end if

        ! Communicate HElement_t variables:

        low = 0; upp = 0;
        sizes(1) = size(ENumCyc)
        sizes(2) = size(SumENum)
        sizes(3) = size(ENumCycAbs)
        sizes(4) = size(cyc_proje_denominator)
        sizes(5) = size(sum_proje_denominator)
        if (t_comm_trial) then
            sizes(6) = size(trial_numerator)
            sizes(7) = size(trial_denom)
            sizes(8) = size(trial_num_inst)
            sizes(9) = size(trial_denom_inst)
            sizes(10) = size(init_trial_numerator)
            sizes(11) = size(init_trial_denom)
        end if
        sizes(12) = size(InitsEnumCyc)
        sizes(13) = size(ENumOut)

        if (sum(sizes(1:14)) > arr_helem_size) then
            call stop_all(t_r, "No space left in arrays for communication of estimates. " &
                    // "Please increase the size of the send_arr_helem " &
                    // "and recv_arr_helem arrays in the source code.")
        end if

        low = upp + 1; upp = low + sizes(1) - 1; send_arr_helem(low:upp) = ENumCyc;
        low = upp + 1; upp = low + sizes(2) - 1; send_arr_helem(low:upp) = SumENum;
        low = upp + 1; upp = low + sizes(3) - 1; send_arr_helem(low:upp) = ENumCycAbs;
        low = upp + 1; upp = low + sizes(4) - 1; send_arr_helem(low:upp) = cyc_proje_denominator;
        low = upp + 1; upp = low + sizes(5) - 1; send_arr_helem(low:upp) = sum_proje_denominator;
        if (t_comm_trial) then
            low = upp + 1; upp = low + sizes(6) - 1; send_arr_helem(low:upp) = trial_numerator;
            low = upp + 1; upp = low + sizes(7) - 1; send_arr_helem(low:upp) = trial_denom;
            low = upp + 1; upp = low + sizes(8) - 1; send_arr_helem(low:upp) = trial_num_inst;
            low = upp + 1; upp = low + sizes(9) - 1; send_arr_helem(low:upp) = trial_denom_inst;
            low = upp + 1; upp = low + sizes(10) - 1; send_arr_helem(low:upp) = init_trial_numerator;
            low = upp + 1; upp = low + sizes(11) - 1; send_arr_helem(low:upp) = init_trial_denom;
        end if
        low = upp + 1; upp = low + sizes(12) - 1; send_arr_helem(low:upp) = InitsENumCyc;
        if (t_output) then
            low = upp + 1; upp = low + sizes(13) - 1; send_arr_helem(low:upp) = ENumOut;
        end if

        call MPISumAll(send_arr_helem(1:upp), recv_arr_helem(1:upp))

        low = 0; upp = 0;
        low = upp + 1; upp = low + sizes(1) - 1; AllENumCyc = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(2) - 1; AllSumENum = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(3) - 1; AllENumCycAbs = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(4) - 1; all_cyc_proje_denominator = recv_arr_helem(low:upp);
        low = upp + 1; upp = low + sizes(5) - 1; all_sum_proje_denominator = recv_arr_helem(low:upp);
        if (t_comm_trial) then
            low = upp + 1; upp = low + sizes(6) - 1; tot_trial_numerator = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(7) - 1; tot_trial_denom = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(8) - 1; tot_trial_num_inst = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(9) - 1; tot_trial_denom_inst = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(10) - 1; tot_init_trial_numerator = recv_arr_helem(low:upp);
            low = upp + 1; upp = low + sizes(11) - 1; tot_init_trial_denom = recv_arr_helem(low:upp);
        end if
        low = upp + 1; upp = low + sizes(12) - 1; AllInitsENumCyc = recv_arr_helem(low:upp);
        if (t_output) then
            low = upp + 1; upp = low + sizes(13) - 1; AllEnumOut = recv_arr_helem(low:upp);
        end if

        ! Optionally communicate EXLEVEL_WNorm.
        if (tLogEXLEVELStats) then
            upp = size(EXLEVEL_WNorm)
            send_arr_WNorm(1:upp) = reshape(EXLEVEL_WNorm, (/upp/))
            call MPISumAll(send_arr_WNorm(1:upp), recv_arr_WNorm(1:upp))
            AllEXLEVEL_WNorm = reshape(recv_arr_WNorm(1:upp), &
                                       shape(AllEXLEVEL_WNorm))
            ! Apply square root for L2 norm.
            AllEXLEVEL_WNorm(2, :, :) = sqrt(AllEXLEVEL_WNorm(2, :, :))
        end if

        ! Do some processing of the received data.

        ! Convert real array into HElement_t one.
        do run = 1, inum_runs
            AllHFCyc(run) = ARR_RE_OR_CPLX(RealAllHFCyc, run)
            AllHFOut(run) = ARR_RE_OR_CPLX(RealAllHFOut, run)
        end do

#ifdef CMPLX_
        norm_psi = sqrt(sum(all_norm_psi_squared))
        norm_semistoch = sqrt(sum(all_norm_semistoch_squared))
#else
        norm_psi = sqrt(all_norm_psi_squared)
        norm_semistoch = sqrt(all_norm_semistoch_squared)
#endif

        ! These require a different type of reduce operation, so are communicated
        ! separately to the above communication.
        call MPIAllReduce(bloom_sizes(1:2), MPI_MAX, bloom_sz_tmp(1:2))
        bloom_sizes(1:2) = bloom_sz_tmp(1:2)

        ! Arrays for checking load balancing.
        call MPIReduce(TotWalkersTemp, MPI_MAX, MaxWalkersProc)
        call MPIReduce(TotWalkersTemp, MPI_MIN, MinWalkersProc)
        call MPIReduce(max_cyc_spawn, MPI_MAX, all_max_cyc_spawn)

    end subroutine communicate_estimates

    subroutine collate_iter_data(iter_data)

        type(fcimc_iter_data) :: iter_data
        logical :: ltmp
        character(len=*), parameter :: this_routine = 'collate_iter_data'

        ! We should update tau searching if it is enabled, or if it has been
        ! enabled, and now tau is outside the range acceptable for tau
        ! searching
        if (t_scale_tau_to_death .and. tau_search_method == possible_tau_search_methods%OFF) then
            call MPIAllLORLogical(scale_tau_to_death_triggered, ltmp)
            scale_tau_to_death_triggered = ltmp
        end if

        ! for now with the new tau-search also update tau in variable shift
        ! mode..
        if (.not. tFillingStochRDMOnFly) then
            if (tau_search_method == possible_tau_search_methods%CONVENTIONAL) then
                call update_tau()
            else if (tau_search_method == possible_tau_search_methods%HISTOGRAMMING) then
                call update_tau_hist()
            else if (scale_tau_to_death_triggered) then
                ASSERT(tau_search_method == possible_tau_search_methods%OFF)
                ASSERT(t_scale_tau_to_death)
                call scale_tau_to_death()
            end if
        end if

        ! quick fix for the double occupancy:
        if (t_calc_double_occ_av) then
            ! sum up the squared norm after shift has set in TODO
            ! and use the mean value if multiple runs are used
            ! still thinking about if i only want to calc it after
            ! equilibration
            sum_norm_psi_squared = sum_norm_psi_squared + &
                                   sum(all_norm_psi_squared) / real(inum_runs, dp)

            ! and also sum up the double occupancy:
            sum_double_occ = sum_double_occ + all_inst_double_occ
        end if

#ifdef DEBUG_
        if (.not. tfirst_cycle) then
            ! realtime case is handled seperately with the check_update_growth function
            ! as each RK step has to be monitored separately

            ! Write this 'ASSERTROOT' out explicitly to avoid line lengths problems
            if ((iProcIndex == root) .and. .not. tSpinProject .and. .not. tTrialShift .and. &
                all(abs(iter_data%update_growth_tot - (AllTotParts - AllTotPartsOld)) > 1.0e-5)) then
                write(stderr, *) "update_growth: ", iter_data%update_growth_tot
                write(stderr, *) "AllTotParts: ", AllTotParts
                write(stderr, *) "AllTotPartsOld: ", AllTotPartsOld
                call stop_all(this_routine, &
                              "Assertation failed: all(iter_data%update_growth_tot.eq.AllTotParts-AllTotPartsOld)")
            end if
        end if
#endif

    end subroutine collate_iter_data

    function relative_trial_numerator(tt_numerator, tt_denom, replica_pairs) &
        result(rel_tot_trial_numerator)
        HElement_t(dp), intent(in) :: tt_numerator(inum_runs), tt_denom(inum_runs)
        logical, intent(in) :: replica_pairs
        HElement_t(dp) :: rel_tot_trial_numerator(inum_runs)
        integer :: run

        if (.not. qmc_trial_wf) then
            ! Becuase tot_trial_numerator/tot_trial_denom is the energy
            ! relative to the the trial energy, add on this contribution to
            ! make it relative to the HF energy.
            if (ntrial_excits == 1) then
                rel_tot_trial_numerator = tt_numerator + (tt_denom * trial_energies(1))
            else
                if (replica_pairs) then
                    do run = 2, inum_runs, 2
                        rel_tot_trial_numerator(run - 1:run) = tt_numerator(run - 1:run) + &
                                                               tt_denom(run - 1:run) * trial_energies(run / 2)
                    end do
                else
                    rel_tot_trial_numerator = tt_numerator + (tt_denom * trial_energies)
                end if
            end if
        else
            rel_tot_trial_numerator = tt_numerator
        end if

    end function relative_trial_numerator

    subroutine update_shift(iter_data, replica_pairs)

        use CalcData, only: tInstGrowthRate, tL2GrowRate

        type(fcimc_iter_data), intent(in) :: iter_data
        logical, intent(in) :: replica_pairs
        integer(int64) :: tot_walkers
        logical, dimension(inum_runs) :: tReZeroShift
        real(dp), dimension(inum_runs) :: AllGrowRateRe, AllGrowRateIm
        real(dp), dimension(inum_runs)  :: AllHFGrowRate, AllWalkers
        real(dp), dimension(lenof_sign) :: denominator, all_denominator
        real(dp), dimension(inum_runs) :: rel_tot_trial_numerator
        integer :: error, i, proc, pos, run, lb, ub
        logical, dimension(inum_runs) :: defer_update
        logical :: start_varying_shift
        character(*), parameter :: this_routine = 'update_shift'

        ! Normally we allow the shift to vary depending on the conditions
        ! tested. Sometimes we want to defer this to the next cycle...
        defer_update(:) = .false.

        ! collate_iter_data --> The values used are only valid on Root
        i_am_root : if (iProcIndex == Root) then

            if (tL2GrowRate) then
                ! use the L2 norm to determine the growrate
                AllGrowRate(:) = norm_psi(:) / old_norm_psi(:)
                AllWalkers(:) = norm_psi(:)

            else if (tInstGrowthRate) then

                ! Calculate the growth rate simply using the two points at
                ! the beginning and the end of the update cycle.
                do run = 1, inum_runs
                    lb = min_part_type(run)
                    ub = max_part_type(run)
                    AllGrowRate(run) = (sum(iter_data%update_growth_tot(lb:ub) &
                                            + iter_data%tot_parts_old(lb:ub))) &
                                       / real(sum(iter_data%tot_parts_old(lb:ub)), dp)
                    AllWalkers(run) = (sum(iter_data%update_growth_tot(lb:ub) &
                                            + iter_data%tot_parts_old(lb:ub)))
                end do

            else

                ! Instead attempt to calculate the average growth over every
                ! iteration over the update cycle
                if (all(.not. near_zero(OldAllAvWalkersCyc))) then
                    AllGrowRate(:) = AllSumWalkersCyc(:) / real(StepsSft, dp) &
                                       / OldAllAvWalkersCyc(:)
                end if
                AllWalkers(:) = AllSumWalkersCyc(:) / real(StepsSft, dp)

            end if
            ! For complex case, obtain both Re and Im parts
#ifdef CMPLX_
            do run = 1, inum_runs
                lb = min_part_type(run)
                ub = max_part_type(run)
                if (.not. near_zero(iter_data%tot_parts_old(lb))) then
                    AllGrowRateRe(run) = &
                        (iter_data%update_growth_tot(lb) + iter_data%tot_parts_old(lb)) &
                        / iter_data%tot_parts_old(lb)
                end if
                if (.not. near_zero(iter_data%tot_parts_old(ub))) then
                    AllGrowRateIm(run) = &
                        (iter_data%update_growth_tot(ub) + iter_data%tot_parts_old(ub)) &
                        / iter_data%tot_parts_old(ub)
                end if
            end do
#endif
            ! If any run uses the fixtrial option, we need to add the offset to the
            ! trial numerator
            if (tTrialWavefunction .and. tTrialShift) &
                rel_tot_trial_numerator = real(relative_trial_numerator( &
                          tot_trial_numerator, tot_trial_denom, replica_pairs), dp)

            ! Exit the single particle phase if the number of walkers exceeds
            ! the value in the input file. If particle no has fallen, re-enter
            ! it.
            tReZeroShift = .false.
            do run = 1, inum_runs
                lb = min_part_type(run)
                ub = max_part_type(run)

                if (tTrialShift .and. .not. tFixTrial(run) .and. tTrialWavefunction .and. abs(tot_trial_denom(run)) >= TrialTarget) then
                    !When reaching target overlap with trial wavefunction, set flag to keep it fixed.
                    tFixTrial(run) = .True.

                    write(stdout, '(a,i13,a,i1)') 'Exiting the varaible shift phase on iteration: ' &
                        , iter + PreviousCycles, ' - overlap with trial wavefunction of the following run is now fixed: ', run
                end if

                if (tFixedN0) then
                    if (tModShiftTarget) tModShiftPhase = .true.

                    if (.not. tSkipRef(run) .and. abs(AllHFCyc(run)) >= N0_Target) then
                        !When reaching target N0, set flag to keep the population of reference det fixed.
                        tSkipRef(run) = .True.

                        write(stdout, '(a,i13,a,i1)') 'Exiting the fixed shift phase on iteration: ' &
                            , iter + PreviousCycles, ' - reference population of the following run is now fixed: ', run
                        !Set these parameters because other parts of the code depends on them
                        VaryShiftIter(run) = Iter
                        iBlockingIter(run) = Iter + PreviousCycles
                        tSinglePartPhase(run) = .false.
                    end if

                    if (tSkipRef(run)) then
                        !Use the projected energy as the shift to fix the
                        !population of the reference det and thus reduce the
                        !fluctuations of the projected energy.

                        !ToDo: Make DiagSft complex
                        DiagSft(run) = real((AllENumCyc(run)) &
                            / (AllHFCyc(run)) + proje_ref_energy_offsets(run), dp)
                        if (tModShiftTarget) then
                            tot_walkers = int(InitWalkers, int64) * int(nNodes, int64)
                            TargetWalkers2 = tot_walkers - AllNoModShiftWalk(run)
                            if (TargetWalkers2 < 1.0_dp) TargetWalkers2 = 1.0_dp
                            if (iter < (ModShiftTargetIter+ModShiftConstIters)) then
                                DiagSft2(run) = DiagSft2Init
                            else
                                DiagSft2(run) = &
                                    DiagSft2(run) &
                                    - (log(AllNoNonModShiftWalk(run)/OldAllNoNonModShiftWalk(run)) * SftDamp &
                                            + log(AllNoNonModShiftWalk(run)/TargetWalkers2) * SftDamp2) &
                                        / (Tau * StepsSft)
                            end if
                        end if

                        ! Update the shift averages
                        if ((iter - VaryShiftIter(run)) >= nShiftEquilSteps) then
                            if ((iter - VaryShiftIter(run) - nShiftEquilSteps) < StepsSft) &
                                write(stdout, '(a,i14)') 'Beginning to average shift value on iteration: ', iter + PreviousCycles
                            VaryShiftCycles(run) = VaryShiftCycles(run) + 1
                            SumDiagSft(run) = SumDiagSft(run) + DiagSft(run)
                            AvDiagSft(run) = SumDiagSft(run) / real(VaryShiftCycles(run), dp)
                        end if
                    else
                        !Keep shift equal to input till target reference population is reached.
                        DiagSft(run) = InputDiagSft(run)
                        if (tModShiftTarget) DiagSft2(run) = DiagSft2Init
                    end if

                else if (tFixTrial(run)) then
                    !Use the trial energy as the shift to fix the
                    !overlap with trial wavefunction and thus reduce the
                    !fluctuations of the trial energy.

                    !ToDo: Make DiafSft complex
                    DiagSft(run) = real((rel_tot_trial_numerator(run) / tot_trial_denom(run)) - Hii, dp)

                    ! Update the shift averages
                    if ((iter - VaryShiftIter(run)) >= nShiftEquilSteps) then
                        if ((iter - VaryShiftIter(run) - nShiftEquilSteps) < StepsSft) &
                            write(stdout, '(a,i14)') 'Beginning to average shift value on iteration: ', iter + PreviousCycles
                        VaryShiftCycles(run) = VaryShiftCycles(run) + 1
                        SumDiagSft(run) = SumDiagSft(run) + DiagSft(run)
                        AvDiagSft(run) = SumDiagSft(run) / real(VaryShiftCycles(run), dp)
                    end if

                else if(.not. tRenorm) then!not Fixed-N0 and not Trial-Shift
                    tot_walkers = int(InitWalkers, int64) * int(nNodes, int64)
                    single_part_phase : if (TSinglePartPhase(run)) then

#ifdef CMPLX_
                        if ((sum(AllTotParts(lb:ub)) > tot_walkers) .or. &
                            (abs_sign(AllNoatHF(lb:ub)) > MaxNoatHF)) then
                            write(stdout, '(a,i13,a)') 'Exiting the single particle growth phase on iteration: ', iter + PreviousCycles, &
                                ' - Shift can now change'
                            VaryShiftIter(run) = Iter
                            iBlockingIter(run) = Iter + PreviousCycles
                            tSinglePartPhase(run) = .false.
                            if (abs(TargetGrowRate(run)) > EPS) then
                                write(stdout, "(A)") "Setting target growth rate to 1."
                                TargetGrowRate = 0.0_dp
                            end if

                            ! If enabled, jump the shift to the value preducted by the
                            ! projected energy!
                            if (tJumpShift) then
                                if (tJumpShift .and. &
                                    (.not. (isnan(real(proje_iter(run), dp))) .or. &
                                     .not. (is_inf(real(proje_iter(run), dp))))) then
                                    DiagSft(run) = real(proje_iter(run), dp)
                                    defer_update(run) = .true.
                                end if
                            end if
                        end if
#else
                        start_varying_shift = .false.
                        if (tLetInitialPopDie) then
                            if (AllTotParts(run) < tot_walkers) start_varying_shift = .true.
                        else if (tTargetShiftdamp .or. tBosonNoSpawn) then
                            start_varying_shift = .true.
                        else
                            if ((AllTotParts(run) > tot_walkers) .or. &
                                (abs(AllNoatHF(run)) > MaxNoatHF)) start_varying_shift = .true.
                        end if

                        if (start_varying_shift) then
                            write(stdout, '(a,i13,a,i1)') 'Exiting the single particle growth phase on iteration: ' &
                                , iter + PreviousCycles, ' - Shift can now change for population', run
                            VaryShiftIter(run) = Iter
                            iBlockingIter(run) = Iter + PreviousCycles
                            tSinglePartPhase(run) = .false.
                            ! [W.D. 15.5.2017]
                            ! change equal 0 comps
                            if (abs(TargetGrowRate(run)) > EPS) then
                                write(stdout, "(A)") "Setting target growth rate to 1."
                                TargetGrowRate(run) = 0.0_dp
                            end if

                            ! If enabled, jump the shift to the value preducted by the
                            ! projected energy!
                            if (tJumpShift) then
                                DiagSft(run) = real(proje_iter(run), dp)
                                defer_update(run) = .true.
                            end if

                            if (tNonInitModShift .and. .not. tModShiftTarget) then
                                tModShiftPhase = .true.
                            end if
                        end if
#endif
                    else ! .not.tSinglePartPhase(run)

#ifdef CMPLX_
                        if (abs_sign(AllNoatHF(lb:ub)) < MaxNoatHF - HFPopThresh) then
#else
                        if (abs(AllNoatHF(run)) < MaxNoatHF - HFPopThresh) then
#endif
                            write(stdout, '(a,i13,a)') &
                                'No at HF has fallen too low - reentering the single particle growth phase on iteration', &
                                iter + PreviousCycles, ' - particle number may grow again.'
                            tSinglePartPhase(run) = .true.
                            tReZeroShift(run) = .true.
                        end if

                    end if single_part_phase

                    ! How should the shift change for the entire ensemble of walkers
                    ! over all processors.

                    if (tModShiftTarget .and. iter > ModShiftTargetIter) then
                        tModShiftPhase = .true.
                    end if

                    if (.not. tRenorm .and. .not. (tSinglePartPhase(run) &
                               .and. near_zero(TargetGrowRate(run)) &
                               .or. defer_update(run))) then

                        !In case we want to continue growing, TargetGrowRate > 0.0_dp
                        ! New shift value
                        !                     if(TargetGrowRate(run).ne.0.0_dp) then
                        ! [W.D. 15.5.2017]
                        if (abs(TargetGrowRate(run)) > EPS) then
#ifdef CMPLX_
                            if (sum(AllTotParts(lb:ub)) > TargetGrowRateWalk(run)) then
#else
                            if (AllTotParts(run) > TargetGrowRateWalk(run)) then
#endif
                                if (tTargetShiftdamp) then
                                    call stop_all(this_routine, &
                                        "Target-shiftdamp not compatible with targetgrowrate!")
                                end if
                                !Only allow targetgrowrate to kick in once we have > TargetGrowRateWalk walkers.
                                DiagSft(run) = DiagSft(run) - (log(AllGrowRate(run) - TargetGrowRate(run)) * SftDamp) / &
                                               (Tau * StepsSft)
                                ! Same for the info shifts for complex walkers
#ifdef CMPLX_
                                DiagSftRe(run) = DiagSftRe(run) - (log(AllGrowRateRe(run) - TargetGrowRate(run)) * SftDamp) / &
                                                     (Tau * StepsSft)
                                DiagSftIm(run) = DiagSftIm(run) - (log(AllGrowRateIm(run) - TargetGrowRate(run)) * SftDamp) / &
                                                     (Tau * StepsSft)
#endif
                            end if
                        else
                            if (tShiftonHFPop) then
                                !Calculate the shift required to keep the HF population constant

                                AllHFGrowRate(run) = abs(AllHFCyc(run) / real(StepsSft, dp)) / abs(OldAllHFCyc(run))
                                DiagSft(run) = DiagSft(run) - (log(AllHFGrowRate(run)) * SftDamp) / &
                                               (Tau * StepsSft)
                            else if (tInitShift) then
                                DiagSft(run) = DiagSft(run) - (log(AllNoInitWalk(run)/OldAllNoInitWalk(run)) * SftDamp) / &
                                               (Tau * StepsSft)
                            else if (tModShiftTarget) then
                                DiagSft(run) = DiagSft(run) - (log(AllNoModShiftWalk(run)/OldAllNoModShiftWalk(run)) * SftDamp + &
                                                   log(AllNoModShiftWalk(run)/TargetWalkers1) * SftDamp2) / &
                                                   (Tau * StepsSft)
                                if (iter < (modShiftTargetIter + modShiftConstIters)) then
                                    DiagSft2(run) = DiagSft2Init
                                else
                                    DiagSft2(run) = DiagSft2(run) - (log(AllNoNonModShiftWalk(run)/OldAllNoNonModShiftWalk(run)) * SftDamp + &
                                                        log(AllNoNonModShiftWalk(run)/TargetWalkers2) * SftDamp2) / &
                                                        (Tau * StepsSft)
                                end if
                            else if (tBosonNoSpawn) then
                                DiagSft(run) = DiagSft(run) - (log(AllNoBosonActiveWalk(run)/OldAllNoBosonActiveWalk(run)) * SftDamp + &
                                                   log(AllNoBosonActiveWalk(run)/tot_walkers) * SftDamp2) / &
                                                   (Tau * StepsSft)
                            else if (tTargetShiftdamp) then
                               if (.not. near_zero(AllGrowRate(run)) .and. .not. near_zero(AllWalkers(run))) then
                                    DiagSft(run) = DiagSft(run) - (log(AllGrowRate(run)) * SftDamp + &
                                                   log(AllWalkers(run)/tot_walkers) * SftDamp2) / &
                                                   (Tau * StepsSft)
                                else
                                    call stop_all(this_routine, "Shift undefined because walker growth rate is zero. Aborting.")
                                end if
                            else
                                if (.not. near_zero(AllGrowRate(run))) then
                                    DiagSft(run) = DiagSft(run) - (log(AllGrowRate(run)) * SftDamp) / &
                                                   (Tau * StepsSft)
                                else
                                    call stop_all(this_routine, "Shift undefined because walker growth rate is zero. Aborting.")
                                end if
                            end if
                        end if

                        ! Update the shift averages
                        if ((iter - VaryShiftIter(run)) >= nShiftEquilSteps) then
                            if ((iter - VaryShiftIter(run) - nShiftEquilSteps) < StepsSft) &
                                write(stdout, '(a,i14)') 'Beginning to average shift value on iteration: ', iter + PreviousCycles
                            VaryShiftCycles(run) = VaryShiftCycles(run) + 1
                            SumDiagSft(run) = SumDiagSft(run) + DiagSft(run)
                            AvDiagSft(run) = SumDiagSft(run) / real(VaryShiftCycles(run), dp)
                        end if

                    end if
                end if !tFixedN0 or not
                ! only update the shift this way if possible
                if (abs_sign(AllNoatHF(lb:ub)) > EPS) then
#ifdef CMPLX_
                    ! Calculate the instantaneous 'shift' from the HF population
                    HFShift(run) = -1.0_dp / abs_sign(AllNoatHF(lb:ub)) * &
                                   (abs_sign(AllNoatHF(lb:ub)) - abs_sign(OldAllNoatHF(lb:ub)) / &
                                    (Tau * real(StepsSft, dp)))
                    InstShift(run) = -1.0_dp / sum(AllTotParts(lb:ub)) * &
                                     ((sum(AllTotParts(lb:ub)) - sum(AllTotPartsOld(lb:ub))) / &
                                      (Tau * real(StepsSft, dp)))
#else
                    ! Calculate the instantaneous 'shift' from the HF population
                    HFShift(run) = -1.0_dp / abs(AllNoatHF(run)) * &
                                   (abs(AllNoatHF(run)) - abs(OldAllNoatHF(run)) / &
                                    (Tau * real(StepsSft, dp)))
                    InstShift(run) = -1.0_dp / AllTotParts(run) * &
                                     ((AllTotParts(run) - AllTotPartsOld(run)) / &
                                      (Tau * real(StepsSft, dp)))
#endif
                end if

                ! When using a linear combination, the denominator is summed
                ! directly.
                all_sum_proje_denominator(run) = ARR_RE_OR_CPLX(AllSumNoatHF, run)
                all_cyc_proje_denominator(run) = AllHFCyc(run)

                ! Calculate the projected energy.

                if (.not. near_zero(AllSumNoatHF(run))) then
                    ProjectionE(run) = (AllSumENum(run)) / (all_sum_proje_denominator(run)) &
                                       + proje_ref_energy_offsets(run)
                end if
                if (abs(AllHFCyc(run)) > EPS) then
                    proje_iter(run) = (AllENumCyc(run)) / (all_cyc_proje_denominator(run)) &
                                      + proje_ref_energy_offsets(run)
                    AbsProjE(run) = (AllENumCycAbs(run)) / (all_cyc_proje_denominator(run)) &
                                    + proje_ref_energy_offsets(run)
                    inits_proje_iter(run) = (AllInitsENumCyc(run)) / (all_cyc_proje_denominator(run)) &
                                            + proje_ref_energy_offsets(run)
                end if
                ! If we are re-zeroing the shift
                if (tReZeroShift(run)) then
                    DiagSft(run) = 0.0_dp
                    DiagSft2(run) = 0.0_dp
                    VaryShiftCycles(run) = 0
                    SumDiagSft(run) = 0.0_dp
                    AvDiagSft(run) = 0.0_dp
                end if
            end do

            ! Get some totalled values
            if (abs(sum(all_sum_proje_denominator(1:inum_runs))) > EPS) then
                projectionE_tot = sum(AllSumENum(1:inum_runs)) &
                                  / sum(all_sum_proje_denominator(1:inum_runs))
            end if
            if (abs(sum(all_cyc_proje_denominator(1:inum_runs))) > EPS) then
                proje_iter_tot = sum(AllENumCyc(1:inum_runs)) &
                                 / sum(all_cyc_proje_denominator(1:inum_runs))
                inits_proje_iter_tot = sum(AllInitsENumCyc(1:inum_runs)) &
                                       / sum(all_cyc_proje_denominator(1:inum_runs))
            end if

        end if i_am_root

        ! Broadcast the shift from root to all the other processors
        call MPIBcast(tSinglePartPhase)
        call MPIBCast(tModShiftPhase)
        call MPIBcast(VaryShiftIter)
        call MPIBcast(DiagSft)
        call MPIBcast(DiagSft2)
        call MPIBcast(tSkipRef)
        call MPIBcast(tFixTrial)
        call MPIBcast(VaryShiftCycles)
        call MPIBcast(SumDiagSft)
        call MPIBcast(AvDiagSft)

        do run = 1, inum_runs
            if (.not. tSinglePartPhase(run)) then
                TargetGrowRate(run) = 0.0_dp
            end if
        end do

        if (tau_search_method /= possible_tau_search_methods%off) then
            if (end_of_search_reached(tau_search_method, tau_stop_method)) then
                call stop_tau_search(tau_stop_method)
            end if
        end if
    end subroutine update_shift

    subroutine rezero_output_stats()
        ! Zero all accumulated variables that are only used in output
        IterTime = 0.0_sp
        Acceptances = 0.0_dp
        NoBorn = 0.0_dp
        NoDied = 0.0_dp
        Annihilated = 0.0_dp
        max_cyc_spawn = 0.0_dp
        trial_numerator = 0.0_dp
        trial_denom = 0.0_dp

        ! These are dedicated output variables
        ENumOut = 0.0_dp
        HFOut = 0.0_dp
        AllTotPartsLastOutput = AllTotParts
        SumWalkersOut = 0.0_dp

        ! reset the truncated weight
        truncatedWeight = 0.0_dp

        ! reset the logged number of initiators
        initsPerExLvl = 0

        ! and the number of excits
        nInvalidExcits = 0
        nValidExcits = 0

        ! the number of non-inititators in the core-space
        n_core_non_init = 0
    end subroutine rezero_output_stats

    subroutine rezero_iter_stats_update_cycle(iter_data, tot_parts_new_all)

        type(fcimc_iter_data), intent(inout) :: iter_data
        real(dp), dimension(lenof_sign), intent(in) :: tot_parts_new_all

        ! Zero all of the variables which accumulate for each iteration.
        SumWalkersCyc(:) = 0.0_dp
        SpawnFromSing = 0.0_dp
        ENumCyc = 0.0_dp
        InitsENumCyc = 0.0_dp
        ENumCycAbs = 0.0_dp
        HFCyc = 0.0_dp
        cyc_proje_denominator = 0.0_dp

        ! also reset the real-time specific quantities:
        ! and maybe have to call this routine twice to rezero also the
        ! inputted iter_data for both RK steps..
        ! Reset TotWalkersOld so that it is the number of walkers now
        TotWalkersOld = TotWalkers
        TotPartsOld = TotParts

        ! Save the number at HF to use in the HFShift
        OldAllNoatHF = AllNoatHF
        !OldAllHFCyc is the average HF value for this update cycle
        OldAllHFCyc = AllHFCyc / real(StepsSft, dp)
        !OldAllAvWalkersCyc gives the average number of walkers per iteration in the last update cycle
        !TODO CMO: are these summed across real/complex?
        OldAllAvWalkersCyc = AllSumWalkersCyc / real(StepsSft, dp)

        ! Also the cumulative global variables
        AllTotWalkersOld = AllTotWalkers
        AllTotPartsOld = AllTotParts
        AllNoAbortedOld = AllNoAborted

        ! For the init-shift
        OldAllNoInitWalk = AllNoInitWalk

        if (tNonInitModShift) then
            ! For the mod-shift-shift
            OldAllNoModShiftWalk = AllNoModShiftWalk
            OldAllNoNonModShiftWalk = AllNoNonModShiftWalk
        end if
        if (tBosonNoSpawn) then
            ! For the bosonized no-spawn
            OldAllNoBosonActiveWalk = AllNoBosonActiveWalk
        end if

        ! also reset the real-time specific quantities:
        ! and maybe have to call this routine twice to rezero also the
        ! inputted iter_data for both RK steps..
        iter_data_fciqmc%update_growth = 0.0_dp
        iter_data_fciqmc%update_iters = 0

        ! and the norm
        old_norm_psi = norm_psi

        ! Reset the counters
        iter_data%update_growth = 0.0_dp
        iter_data%update_iters = 0
        iter_data%tot_parts_old = tot_parts_new_all

        cont_spawn_attempts = 0
        cont_spawn_success = 0
        tfirst_cycle = .false.
        if (t_calc_double_occ) then
            call rezero_double_occ_stats()
            if (t_spin_measurements) then
                call rezero_spin_diff()
            end if
        end if

        if (t_measure_local_spin) then
            call rezero_local_spin_stats()
        end if

    end subroutine rezero_iter_stats_update_cycle

    subroutine iteration_output_wrapper(iter_data, tot_parts_new, &
                                        replica_pairs, t_comm_req)
        type(fcimc_iter_data), intent(inout) :: iter_data
        real(dp), dimension(lenof_sign), intent(in) :: tot_parts_new
        real(dp), dimension(lenof_sign) :: tot_parts_new_all
        logical, intent(in) :: replica_pairs
        logical, intent(in), optional :: t_comm_req
        logical :: t_do_comm

        ! The comm can be switched off
        if (present(t_comm_req)) then
            t_do_comm = t_comm_req
        else
            t_do_comm = .true.
        end if

        if (t_do_comm) &
            call communicate_estimates(iter_data, tot_parts_new, tot_parts_new_all, .true.)
        if (tPrintDataTables) &
            call write_to_stats()

    contains
        subroutine write_to_stats()
            ! Write the current output cycle's stats to the FCIMCStats/fciqmc_stats output file
            ! + stdout

            ! adjust the trial numerator for output (add in the offset)
            if (tTrialWavefunction) then
                tot_trial_numerator = relative_trial_numerator( &
                                      tot_trial_numerator, tot_trial_denom, replica_pairs)
                if (tTruncInitiator) &
                    tot_init_trial_numerator = relative_trial_numerator( &
                                               tot_init_trial_numerator, tot_init_trial_denom, replica_pairs)
            end if
            call output_diagnostics()

            if (tFCIMCStats2) then
                call write_fcimcstats2(iter_data_fciqmc)
            else
                call WriteFCIMCStats()
            end if
            ! reset accumulated output variables
            call rezero_output_stats()
        end subroutine write_to_stats
    end subroutine iteration_output_wrapper

    subroutine calculate_new_shift_wrapper(iter_data, tot_parts_new, replica_pairs, t_comm_req)

        type(fcimc_iter_data), intent(inout) :: iter_data
        real(dp), dimension(lenof_sign), intent(in) :: tot_parts_new
        real(dp), dimension(lenof_sign) :: tot_parts_new_all
        logical, intent(in) :: replica_pairs
        ! optional argument: if false is passed, do not do the shift update and diagnostics,
        !                    only produce output
        logical, intent(in), optional :: t_comm_req
        logical :: t_do_comm

        ! TODO: use def_default once available
        if (present(t_comm_req)) then
            t_do_comm = t_comm_req
        else
            t_do_comm = .true.
        end if

        ! communication of trial wf properties is only done for output steps
        if (t_do_comm) &
            call communicate_estimates(iter_data, tot_parts_new, tot_parts_new_all, tCoupleCycleOutput)

        ! update the shift and rezero the cycle data
        call shift_update()
        call rezero_iter_stats_update_cycle(iter_data, tot_parts_new_all)

    contains

        subroutine shift_update()
            ! This is what defines the update of the shift
            call collate_iter_data(iter_data)
            call iter_diagnostics()
            if (tRestart) return
            call population_check()
            call update_shift(iter_data, replica_pairs)
        end subroutine shift_update

    end subroutine calculate_new_shift_wrapper

    subroutine update_iter_data(iter_data)

        type(fcimc_iter_data), intent(inout) :: iter_data

        iter_data%update_growth = iter_data%update_growth + iter_data%nborn &
                                  - iter_data%ndied - iter_data%nannihil &
                                  - iter_data%naborted - iter_data%nremoved
        iter_data%update_iters = iter_data%update_iters + 1

    end subroutine update_iter_data

    !Fix the overlap with trial wavefunction by enforcing the value of a random determinant of the trial space
    !As long as the shift equals the trial energy, this should still give the right dynamics.
    subroutine fix_trial_overlap(iter_data)
        use util_mod, only: binary_search_first_ge
        type(fcimc_iter_data), intent(inout) :: iter_data

        HElement_t(dp), dimension(inum_runs) :: new_trial_denom, new_tot_trial_denom
        real(dp), dimension(lenof_sign) :: trial_delta, SignCurr, newSignCurr
        integer :: j, rand, det_idx, proc_idx, run, part_type, lbnd, ubnd, err
        integer :: trial_count, trial_indices(tot_trial_space_size)
        real(dp) :: amps(tot_trial_space_size), total_amp, total_amps(nProcessors)
        logical :: tIsStateDeterm

#ifdef CMPLX_
        unused_var(iter_data)
        call stop_all("fix_trial_overlap", "Complex wavefunction is not supported yet!")
#else

        !Calculate the new overlap
        new_trial_denom = 0.0
        new_tot_trial_denom = 0.0

        trial_count = 0
        total_amp = 0.0
        do j = 1, int(TotWalkers)
            call extract_sign(CurrentDets(:, j), SignCurr)
            if (.not. IsUnoccDet(SignCurr) .and. test_flag(CurrentDets(:, j), flag_trial)) then
                trial_count = trial_count + 1
                trial_indices(trial_count) = j
                amps(trial_count) = abs(current_trial_amps(1, j))
                total_amp = total_amp + amps(trial_count)
                !Update the overlap
                if (ntrial_excits == 1) then
                    new_trial_denom = new_trial_denom + current_trial_amps(1, j) * SignCurr
                else if (tReplicaReferencesDiffer .and. tPairedReplicas) then
                    do run = 2, inum_runs, 2
        new_trial_denom(run - 1:run) = new_trial_denom(run - 1:run) + current_trial_amps(run / 2, j) * SignCurr(run - 1:run)
                    end do
                else if (ntrial_excits == lenof_sign) then
                    new_trial_denom = new_trial_denom + current_trial_amps(:, j) * SignCurr
                end if
            end if
        end do

        !Collecte overlaps from call processors
        call MPIAllReduce(new_trial_denom, MPI_SUM, new_tot_trial_denom)

        !Choose a random processor propotioanl to the sum of amplitudes of its trial space
        call MPIGather(total_amp, total_amps, err)
        if (iProcIndex == root) then
            !write(stdout,*) "total_amps: ", total_amps
            do j = 2, nProcessors
                total_amps(j) = total_amps(j) + total_amps(j - 1)
            end do
            proc_idx = binary_search_first_ge(total_amps, genrand_real2_dSFMT() * total_amps(nProcessors)) - 1
        end if
        call MPIBCast(proc_idx)

        !write(stdout,*) "proc_idx", proc_idx
        !write(stdout,*) "total_count: ", trial_count
        !write(stdout,*) "amps: ", amps(1:trial_count)
        !Enforcing an update of the random determinant of the random processor
        if (iProcIndex == proc_idx) then
            !Choose a random determinant
            do j = 2, trial_count
                amps(j) = amps(j) + amps(j - 1)
            end do
            det_idx = trial_indices(binary_search_first_ge(amps(1:trial_count), genrand_real2_dSFMT() * amps(trial_count)))
            do part_type = 1, lenof_sign
                run = part_type_to_run(part_type)
                if (tFixTrial(run)) then
         trial_delta(part_type) = (tot_trial_denom(run) - new_tot_trial_denom(run)) / current_trial_amps(part_type, det_idx)
                else
                    trial_delta(part_type) = 0.0
                end if
            end do

            call extract_sign(CurrentDets(:, det_idx), SignCurr)
            newSignCurr = SignCurr + trial_delta
            call encode_sign(CurrentDets(:, det_idx), newSignCurr)

            !Correct statistics filled by CalcHashTableStats
            iter_data%ndied = iter_data%ndied + abs(SignCurr)
            iter_data%nborn = iter_data%nborn + abs(newSignCurr)
            TotParts = TotParts + abs(newSignCurr) - abs(SignCurr)

            tIsStateDeterm = .False.
            if (tSemiStochastic) tIsStateDeterm = test_flag_multi(CurrentDets(:, det_idx), flag_deterministic)

            norm_psi_squared = norm_psi_squared + (newSignCurr)**2 - SignCurr**2
            if (tIsStateDeterm) norm_semistoch_squared = norm_semistoch_squared + (newSignCurr)**2 - SignCurr**2

            if (tCheckHighestPop) then
                do run = 1, inum_runs
                    lbnd = min_part_type(run)
                    ubnd = max_part_type(run)
                    if (abs_sign(newSignCurr(lbnd:ubnd)) > iHighestPop(run)) then
                        iHighestPop(run) = int(abs_sign(newSignCurr(lbnd:ubnd)))
                        HighestPopDet(:, run) = CurrentDets(:, det_idx)
                    end if
                end do
            end if
            if (tFillingStochRDMonFly) then
                if (IsUnoccDet(newSignCurr) .and. (.not. tIsStateDeterm)) then
                    if (DetBitEQ(CurrentDets(:, det_idx), iLutHF_True, nifd)) then
                        AvNoAtHF = 0.0_dp
                        IterRDM_HF = Iter + 1
                    end if
                end if
            end if

            if (DetBitEQ(CurrentDets(:, det_idx), iLutHF_True, nifd)) then
                InstNoAtHF = newSignCurr
            end if
        end if
#endif
    end subroutine fix_trial_overlap

end module fcimc_iter_utils