#include "macros.h" module orthogonalise use FciMCData, only: TotWalkers, CurrentDets, all_norm_psi_squared, & NoBorn, NoDied, fcimc_iter_data, replica_overlaps_real, & all_overlaps, all_norms, & #ifdef CMPLX_ replica_overlaps_imag, & #endif HolesInList, iter use CalcData, only: OccupiedThresh, tOrthogonaliseSymmetric, tSemiStochastic, & tPairedReplicas, t_test_overlap, overlap_eps, n_stop_ortho use dSFMT_interface, only: genrand_real2_dSFMT use load_balance, only: CalcHashTableStats use bit_rep_data, only: extract_sign use bit_reps, only: encode_sign use semi_stoch_procs, only: check_determ_flag use Parallel_neci use constants use util_mod use basic_float_math, only: is_nan implicit none contains subroutine orthogonalise_replicas(iter_data) ! Apply a Gram Schmidt orthogonalisation to the different system ! replicas. ! ! |psi_2'> = |psi_2> - (|psi_1><psi_1|psi_2>)/(<psi_1|psi_1>) type(fcimc_iter_data), intent(inout) :: iter_data integer :: tgt_run, src_run, run, j, TotWalkersNew integer :: tgt_sgn, src_sgn real(dp) :: all_overlaps_real(inum_runs, inum_runs) integer :: low_loop_bound, high_loop_bound, loop_step real(dp) :: norms(inum_runs), overlaps_real(inum_runs, inum_runs) real(dp) :: sgn(lenof_sign), sgn_orig, delta_real, r #ifdef CMPLX_ ! the min_part_type and max_part_type macros return the index of the sgns ! corresponding to the real and imag parts of a replica respectively real(dp) :: overlaps_imag(inum_runs, inum_runs), all_overlaps_imag(inum_runs, inum_runs) real(dp) :: delta_imag #endif logical :: tCoreDet character(*), parameter :: this_routine = 'orthogonalise_replicas' #ifndef PROG_NUMRUNS_ call stop_all(this_routine, "orthogonalise replicas requires mneci or kmneci") #endif ! If we are using the symmetric orthogonaliser, then bypass this ! routine... if (tOrthogonaliseSymmetric) then call orthogonalise_replicas_lowdin(iter_data) return end if norms = 0.0_dp overlaps_real = 0.0_dp #ifdef CMPLX_ overlaps_imag = 0.0_dp #endif do tgt_run = 1, inum_runs HolesInList = 0 do j = 1, int(TotWalkers) ! n.b. We are using a non-contiguous list (Hash algorithm) call extract_sign(CurrentDets(:, j), sgn) tCoreDet = check_determ_flag(CurrentDets(:, j)) if (IsUnoccDet(sgn) .and. (.not. tCoreDet)) then HolesInList = HolesInList + 1 cycle end if ! Loop over source runs, and subtract out components sgn_orig = sgn(tgt_run) ! set loop parameters if (tPairedReplicas) then low_loop_bound = 2 - mod(tgt_run, 2) high_loop_bound = tgt_run - 2 loop_step = 2 else low_loop_bound = 1 high_loop_bound = tgt_run - 1 loop_step = 1 end if do src_run = low_loop_bound, high_loop_bound, loop_step #ifdef CMPLX_ ! (a + ib)*(c + id) = ac - bd + i(ad +bc) delta_real = -(sgn(min_part_type(src_run)) & * all_overlaps_real(src_run, tgt_run) & - sgn(max_part_type(src_run)) & * all_overlaps_imag(src_run, tgt_run)) & / all_norms(src_run) sgn(min_part_type(tgt_run)) = sgn(min_part_type(tgt_run)) + delta_real delta_imag = -(sgn(min_part_type(src_run)) & * all_overlaps_imag(src_run, tgt_run) & + sgn(max_part_type(src_run)) & * all_overlaps_real(src_run, tgt_run)) & / all_norms(src_run) sgn(max_part_type(tgt_run)) = sgn(max_part_type(tgt_run)) + delta_imag #else if (.not. near_zero(all_norms(src_run))) then delta_real = -sgn(src_run) * all_overlaps_real(src_run, tgt_run) & / all_norms(src_run) end if ! test if a small remaining overlap still gives the ! correct shift.. if (t_test_overlap .and. tgt_run == inum_runs) then ! draw a random sign and change by the ! specified epsilon ! or choose just a random number out of [-eps,+eps] ! or try a prefactor.. delta_real = overlap_eps * delta_real if (n_stop_ortho > 0 .and. iter > n_stop_ortho) then delta_real = 0.0_dp end if end if sgn(tgt_run) = sgn(tgt_run) + delta_real #endif end do call encode_sign(CurrentDets(:, j), sgn) ! Note that we shouldn't be able to kill all particles on a ! site, as we can only change a run if there are particles in ! a lower indexed run to change... ! The exception is when using semi-stochastic, where ! unoccupied determinants can be stored. if (.not. tSemiStochastic) then ASSERT(.not. IsUnoccDet(sgn)) end if ! Now we need to our accounting to make sure that NoBorn/ ! Died/etc. counters remain reasonable. ! ! n.b. we don't worry about the delta=0 case, as adding 0 to ! the accumulators doesn't cause errors... if (sgn(tgt_run) >= 0 .neqv. sgn_orig >= 0) then NoDied(tgt_run) = NoDied(tgt_run) + abs(sgn_orig) iter_data%ndied(tgt_run) = iter_data%ndied(tgt_run) & + abs(sgn_orig) NoBorn(tgt_run) = NoBorn(tgt_run) + abs(sgn(tgt_run)) iter_data%nborn(tgt_run) = iter_data%nborn(tgt_run) & + abs(sgn(tgt_run)) else if (abs(sgn(tgt_run)) >= abs(sgn_orig)) then NoBorn(tgt_run) = NoBorn(tgt_run) & + abs(sgn(tgt_run) - sgn_orig) iter_data%nborn(tgt_run) = iter_data%nborn(tgt_run) & + abs(sgn(tgt_run) - sgn_orig) else NoDied(tgt_run) = NoDied(tgt_run) & + abs(sgn(tgt_run) - sgn_orig) iter_data%ndied(tgt_run) = iter_data%ndied(tgt_run) & + abs(sgn(tgt_run) - sgn_orig) end if ! We should be accumulating the norm for this run, so it can ! be used in processing later runs. We should also be ! accumulating the overlap terms for doing the same ! ! n.b. These are the values _after_ orthogonalisation with ! the previous runs #ifdef CMPLX_ ! (a+ib)*(a-ib) = a^2 + b^2 norms(tgt_run) = norms(tgt_run) & + sgn(min_part_type(tgt_run))**2 + sgn(max_part_type(tgt_run))**2 #else norms(tgt_run) = norms(tgt_run) + sgn(tgt_run)**2 #endif do run = tgt_run + 1, inum_runs #ifdef CMPLX_ ! we want the inner product of the vectors: ! ___ ! <n| = \ ` [ a(n,r) - ib(n,r) ] <r| ! /__, ! r ! ___ ! |m> = \ ` [ a(n,r) + ib(n,r) ] |r> ! /__, ! r ! ___ ! <n|m> = \ ` a(n,r)*a(m,r) + b(n,r)*b(m,r) ! /__, + i [ a(n,r)*b(m,r) - b(n,r)*a(m,r) ] ! r ! ! Note that symmetry may no longer be assumed for the overlap matrix ! i.e. overlaps_real(n,m) = overlaps_real(m,n), but ! overlaps_imag(n,m) = -overlaps_imag(m,n) ! only storing overlaps for run(m) > tgt_run(n): ! <n|m> = ! _ _ ! | | ! | x | ! | x x | ! | x x x | ! | x x x x | ! | x x x x x | ! |_x x x x x x _| overlaps_real(tgt_run, run) = overlaps_real(tgt_run, run) & + sgn(min_part_type(run)) * sgn(min_part_type(tgt_run)) & + sgn(max_part_type(run)) * sgn(max_part_type(tgt_run)) overlaps_imag(tgt_run, run) = overlaps_imag(tgt_run, run) & + sgn(min_part_type(run)) * sgn(max_part_type(tgt_run)) & + sgn(max_part_type(run)) * sgn(min_part_type(tgt_run)) overlaps_real(run, tgt_run) = overlaps_real(tgt_run, run) overlaps_imag(run, tgt_run) = -overlaps_imag(tgt_run, run) #else overlaps_real(tgt_run, run) = overlaps_real(tgt_run, run) & + sgn(run) * sgn(tgt_run) overlaps_real(run, tgt_run) = overlaps_real(tgt_run, run) #endif end do end do ! And ensure that the norm/overlap data is accumulated onto all ! of the processors. call MPISumAll(norms, all_norms) call MPISumAll(overlaps_real, all_overlaps_real) #ifdef CMPLX_ call MPISumAll(overlaps_imag, all_overlaps_imag) #endif end do ! Store a normalised overlap matrix for each of the replicas. do src_run = 1, inum_runs - 1 do tgt_run = src_run + 1, inum_runs if (all_norms(src_run) * all_norms(tgt_run) > EPS) then replica_overlaps_real(src_run, tgt_run) = & all_overlaps_real(src_run, tgt_run) / & sqrt(all_norms(src_run) * all_norms(tgt_run)) #ifdef CMPLX_ replica_overlaps_imag(src_run, tgt_run) = & all_overlaps_imag(src_run, tgt_run) / & sqrt(all_norms(src_run) * all_norms(tgt_run)) #endif end if end do end do ! Store in global data for perturbation accumulation. all_overlaps = all_overlaps_real ! We now need to aggregate statistics here, rather than at the end ! of annihilation, as they have been modified by this routine... ! ! n.b. TotWalkersNew is integer, TotWalkers is int64 to ensure that it ! can be collected using AllTotWalkers (which may end up with far ! too many walkers). TotWalkersNew = int(TotWalkers) call CalcHashTableStats(TotWalkersNew, iter_data) TotWalkers = TotWalkersNew end subroutine orthogonalise_replicas subroutine orthogonalise_replica_pairs(iter_data) ! complex walkers not supported here. This routine is never called: deprecated code? ! Apply a Gram Schmidt orthogonalisation to the different system ! replicas. ! This routine works in cases where there are two replicas representing ! each excited state. Whenever an overlap has to be calculated between ! different excited states, both pairs are used and averaged. When a ! norm needs to be calculated, the overlap is taken between the two ! replicas for the same excited state. ! Underscores denote the excited state label and uperscores represent ! the replica label (i.e. 1 or 2): ! |psi_2^1'> = |psi_2^1> - (|psi_1^1><psi_1^2|psi_2^1> + ! |psi_1^2><psi_1^1|psi_2^1>)/(2<psi_1^1|psi_1^2>) type(fcimc_iter_data), intent(inout) :: iter_data integer :: tgt_state, src_state, run, j, irep, imod1, imod2, TotWalkersNew real(dp) :: norms(inum_runs.div.2), overlaps(inum_runs, inum_runs) real(dp) :: sgn(lenof_sign), sgn_orig(2), delta, r logical :: tCoreDet character(len=*), parameter :: this_routine = "orthogonalise_replica_pairs" ASSERT(inum_runs == lenof_sign) #ifndef PROG_NUMRUNS_ unused_var(iter_data) call stop_all(this_routine, "orthogonalise replicas requires mneci.x") #else norms = 0.0_dp overlaps = 0.0_dp ! Loop over all excited states (thus over all replica *pairs*). do tgt_state = 1, inum_runs / 2 HolesInList = 0 do j = 1, int(TotWalkers) ! n.b. We are using a non-contiguous list (Hash algorithm). call extract_sign(CurrentDets(:, j), sgn) tCoreDet = check_determ_flag(CurrentDets(:, j)) if (IsUnoccDet(sgn) .and. (.not. tCoreDet)) then HolesInList = HolesInList + 1 cycle end if if (tgt_state > 1) then ! The two replica signs for this excited state. sgn_orig(1) = sgn(tgt_state * 2 - 1) sgn_orig(2) = sgn(tgt_state * 2) ! Loop over both replicas for this excited state. do irep = 1, 2 ! These variables equal (1,2) and (2,1) when irep = 1 and ! 2, repsectively. They are used to access the correct ! indices of the sign arrays. imod1 = mod(irep, 2) imod2 = 1 - imod1 do src_state = 1, tgt_state - 1 delta = -sgn(src_state * 2 - imod1) * & all_overlaps(src_state * 2 - imod2, tgt_state * 2 - imod1) / & (2 * all_norms(src_state)) sgn(tgt_state * 2 - imod1) = sgn(tgt_state * 2 - imod1) + delta delta = -sgn(src_state * 2 - imod2) * & all_overlaps(src_state * 2 - imod1, tgt_state * 2 - imod1) / & (2 * all_norms(src_state)) sgn(tgt_state * 2 - imod1) = sgn(tgt_state * 2 - imod1) + delta end do end do call encode_sign(CurrentDets(:, j), sgn) ! Note that we shouldn't be able to kill all particles on a ! site, as we can only change a run if there are particles in ! a lower indexed run to change... ! The exception is when using semi-stochastic, where ! unoccupied determinants can be stored. if (.not. tSemiStochastic) then ASSERT(.not. IsUnoccDet(sgn)) end if ! Now we need to our accounting to make sure that NoBorn/ ! Died/etc. counters remain reasonable. ! ! n.b. we don't worry about the delta=0 case, as adding 0 to ! the accumulators doesn't cause errors... do irep = 1, 2 run = tgt_state * 2 - mod(irep, 2) if (sgn(run) >= 0 .neqv. sgn_orig(irep) >= 0) then NoDied(run) = NoDied(run) + abs(sgn_orig(irep)) iter_data%ndied(run) = iter_data%ndied(run) & + abs(sgn_orig(irep)) NoBorn(run) = NoBorn(run) + abs(sgn(run)) iter_data%nborn(run) = iter_data%nborn(run) & + abs(sgn(run)) else if (abs(sgn(run)) >= abs(sgn_orig(irep))) then NoBorn(run) = NoBorn(run) & + abs(sgn(run) - sgn_orig(irep)) iter_data%nborn(run) = iter_data%nborn(run) & + abs(sgn(run) - sgn_orig(irep)) else NoDied(run) = NoDied(run) & + abs(sgn(run) - sgn_orig(irep)) iter_data%ndied(run) = iter_data%ndied(run) & + abs(sgn(run) - sgn_orig(irep)) end if end do end if ! We should be accumulating the norm for this run, so it can ! be used in processing later runs. We should also be ! accumulating the overlap terms for doing the same ! ! n.b. These are the values _after_ orthogonalisation with ! the previous runs norms(tgt_state) = norms(tgt_state) + sgn(2 * tgt_state - 1) * sgn(2 * tgt_state) ! Calculate overlaps for all 4 combinations of the 2 replicas ! of the 2 excited states. do run = tgt_state + 1, inum_runs / 2 overlaps(tgt_state * 2 - 1, run * 2 - 1) = overlaps(tgt_state * 2 - 1, run * 2 - 1) & + sgn(tgt_state * 2 - 1) * sgn(run * 2 - 1) overlaps(tgt_state * 2 - 1, run * 2) = overlaps(tgt_state * 2 - 1, run * 2) & + sgn(tgt_state * 2 - 1) * sgn(run * 2) overlaps(tgt_state * 2, run * 2 - 1) = overlaps(tgt_state * 2, run * 2 - 1) & + sgn(tgt_state * 2) * sgn(run * 2 - 1) overlaps(tgt_state * 2, run * 2) = overlaps(tgt_state * 2, run * 2) & + sgn(tgt_state * 2) * sgn(run * 2) end do end do ! And ensure that the norm/overlap data is accumulated onto all ! of the processors. call MPISumAll(norms, all_norms) call MPISumAll(overlaps, all_overlaps) end do ! We now need to aggregate statistics here, rather than at the end ! of annihilation, as they have been modified by this routine... ! ! n.b. TotWalkersNew is integer, TotWalkers is int64 to ensure that it ! can be collected using AllTotWalkers (which may end up with far ! too many walkers). TotWalkersNew = int(TotWalkers) call CalcHashTableStats(TotWalkersNew, iter_data) TotWalkers = TotWalkersNew #endif end subroutine orthogonalise_replica_pairs subroutine orthogonalise_replicas_2runs(iter_data) ! complex walkers not supported here. This routine is never called: deprecated code? ! Apply a Gram Schmidt orthogonalisation to the different system ! replicas. ! ! |psi_2'> = |psi_2> - (|psi_1><psi_1|psi_2>)/(<psi_1|psi_1>) integer :: j, TotWalkersNew real(dp) :: sgn(lenof_sign), delta, scal_prod, all_scal_prod, r real(dp) :: sgn_orig real(dp) :: psi_squared(lenof_sign), all_psi_squared(lenof_sign) type(fcimc_iter_data), intent(inout) :: iter_data character(*), parameter :: this_routine = 'orthogonalise_replicas_2runs' ASSERT(inum_runs == 2) ASSERT(lenof_sign == 2) #ifndef PROG_NUMRUNS_ call stop_all(this_routine, "orthogonalise replicas requires mneci.x") #else ! We need the norm of the wavefunction to do anything. Don't trust ! the global values here, as they aren't valid until after the ! communication --> not set yet. ! ! We want them to be valid for the new psi... psi_squared = 0 scal_prod = 0 do j = 1, int(TotWalkers) call extract_sign(CurrentDets(:, j), sgn) if (IsUnoccDet(sgn)) cycle psi_squared = psi_squared + sgn**2 scal_prod = scal_prod + sgn(1) * sgn(2) end do call MPISumAll(psi_squared, all_psi_squared) call MPISumAll(scal_prod, all_scal_prod) ! Calculate the change do j = 1, int(TotWalkers) ! Adjust the wavefunctions call extract_sign(CurrentDets(:, j), sgn) if (IsUnoccDet(sgn)) cycle delta = -sgn(1) * all_scal_prod / all_psi_squared(1) sgn_orig = sgn(2) sgn(2) = sgn(2) + delta ! And stochastically round, so that the minimum particle sign ! is maintained in an unbiased way. if (abs(sgn(2)) < 1.0_dp) then r = genrand_real2_dSFMT() if (r > abs(sgn(2))) then sgn(2) = sign(1.0_dp, sgn(2)) else sgn(2) = 0.0_dp end if end if call encode_sign(CurrentDets(:, j), sgn) ! Note that we shouldn't be able to kill all particles on a site, ! as we can only change run 2 if run 1 is occupied, and run 1 ! doesn't change... ! The exception is when using semi-stochastic, where ! unoccupied determinants can be stored. if (.not. tSemiStochastic) then ASSERT(.not. IsUnoccDet(sgn)) end if ! Now we need to do our accounting to make sure that the NoBorn/ ! Died/etc. counters remain reasonable. ! ! n.b. we don't worry about the delta=0 case, as adding 0 to the ! accumulators doesn't cause errors. if (sgn(2) >= 0 .neqv. sgn_orig >= 0) then NoDied(2) = NoDied(2) + abs(sgn_orig) iter_data%ndied(2) = iter_data%ndied(2) + abs(sgn_orig) NoBorn(2) = NoBorn(2) + abs(sgn(2)) iter_data%nborn(2) = iter_data%nborn(2) + abs(sgn(2)) else if (abs(sgn(2)) >= abs(sgn_orig)) then NoBorn(2) = NoBorn(2) + abs(sgn(2) - sgn_orig) iter_data%nborn(2) = iter_data%nborn(2) + abs(sgn(2) - sgn_orig) else NoDied(2) = NoDied(2) + abs(sgn(2) - sgn_orig) iter_data%ndied(2) = iter_data%ndied(2) + abs(sgn(2) - sgn_orig) end if end do #endif ! We now need to aggregate statistics here, rather than at the end ! of annihilation, as they have been modified by this routine... ! ! n.b. TotWalkersNew is integer, TotWalkers is int64 to ensure that it ! can be collected using AllTotWalkers (which may end up with far ! to many walkers). TotWalkersNew = int(TotWalkers) call CalcHashTableStats(TotWalkersNew, iter_data) TotWalkers = TotWalkersNew end subroutine subroutine write_mat(mat) real(dp) :: mat(:, :) integer :: i, j do i = lbound(mat, 1), ubound(mat, 1) do j = lbound(mat, 2), ubound(mat, 2) write(stdout, '(f17.9, " ")', advance='no') mat(i, j) end do write(stdout, *) end do end subroutine subroutine orthogonalise_replicas_lowdin(iter_data) ! Perform a symmetric (Lowdin) orthogonalisation type(fcimc_iter_data), intent(inout) :: iter_data character(*), parameter :: this_routine = 'orthogonalise_replicas_lowdin' real(dp) :: S(inum_runs, inum_runs), S_all(inum_runs, inum_runs) real(dp) :: evecs(inum_runs, inum_runs), evecs_t(inum_runs, inum_runs) real(dp) :: S_half(inum_runs, inum_runs) real(dp) :: elem, sgn(lenof_sign), sgn_orig(lenof_sign) real(dp) :: work(3 * inum_runs - 1), evals(inum_runs) real(dp) :: sgn_orig_norm(lenof_sign), sgn_norm(lenof_sign) real(dp) :: norm(inum_runs) integer :: j, run, runa, runb, info, TotWalkersNew logical :: tCoreDet ! Not implemented for complex (yet) ASSERT(inum_runs == lenof_sign) #ifndef PROG_NUMRUNS_ call stop_all(this_routine, "orthogonalise replicas requires mneci.x") #else ! Generate the overlap matrix (unnormalised) S = 0 do j = 1, int(TotWalkers) call extract_sign(CurrentDets(:, j), sgn) if (IsUnoccDet(sgn)) cycle do runa = 1, inum_runs do runb = runa, inum_runs elem = sgn(runa) * sgn(runb) S(runa, runb) = S(runa, runb) + elem if (runa /= runb) & S(runb, runa) = S(runb, runa) + elem end do end do end do call MPISumAll(S, S_all) ! Normalise everything (the diagonal terms give the normalisation ! constants) S = S_all do run = 1, inum_runs norm(run) = sqrt(S_all(run, run)) S(run, :) = S(run, :) / norm(run) S(:, run) = S(:, run) / norm(run) end do evecs = S ! Diagonalise the S matrix call dsyev('V', 'U', inum_runs, evecs, inum_runs, evals, work, & size(work), info) if (any(evals < 0)) then write(stdout, *) '*** WARNING ***' write(stdout, *) "Not orthogonalising this iteration." write(stdout, *) 'Negative eigenvalue of overlap matrix found' return end if ! Take the square roots of the eigenvalues evals = 1.0_dp / sqrt(evals) evecs_t = transpose(evecs) ! Multiply through by the square root, and obtain S^{-0.5} do j = 1, inum_runs evecs_t(j, :) = evecs_t(j, :) * evals(j) end do S_half = matmul(evecs, evecs_t) if (any(is_nan(s_half))) call stop_all(this_routine, "NaNs found") ! Go through and update the values! HolesInList = 0 do j = 1, int(TotWalkers) ! n.b. We are using a non-contiguous list (Hash algorith) call extract_sign(CurrentDets(:, j), sgn_orig) tCoreDet = check_determ_flag(CurrentDets(:, j)) if (IsUnoccDet(sgn_orig) .and. (.not. tCoreDet)) then HolesInList = HolesInList + 1 cycle end if ! Ultimately, we want to use normalised signs for the Lowdin ! expressions sgn_orig_norm = sgn_orig / norm ! Obtain the new sign values sgn_norm = matmul(S_half, sgn_orig_norm) sgn = sgn_norm * norm call encode_sign(CurrentDets(:, j), sgn) ! We should not be able to kill all particles on a site. This is ! a rotation. The exception is when using semi-stochastic, where ! unoccupied determinants can be stored. if (.not. tSemiStochastic) then ASSERT(.not. IsUnoccDet(sgn)) end if ! Do some particle accounting do run = 1, inum_runs if (sgn(run) >= 0 .neqv. sgn_orig(run) >= 0) then NoDied(run) = NoDied(run) + abs(sgn_orig(run)) iter_data%ndied(run) = iter_data%ndied(run) & + abs(sgn_orig(run)) NoBorn(run) = NoBorn(run) + abs(sgn(run)) iter_data%nborn(run) = iter_data%nborn(run) + abs(sgn(run)) else if (abs(sgn(run)) >= abs(sgn_orig(run))) then NoBorn(run) = NoBorn(run) + abs(sgn(run) - sgn_orig(run)) iter_data%nborn(run) = iter_data%nborn(run) & + abs(sgn(run) - sgn_orig(run)) else NoDied(run) = NoDied(run) + abs(sgn(run) - sgn_orig(run)) iter_data%ndied(run) = iter_data%ndied(run) & + abs(sgn(run) - sgn_orig(run)) end if end do end do #endif ! We now need to aggregate statistics here, rather than at the end ! of annihilation, as they have been modified by this routine... ! ! n.b. TotWalkersNew is integer, TotWalkers is int64 to ensure that it ! can be collected using AllTotWalkers (which may end up with far ! to many walkers). TotWalkersNew = int(TotWalkers) call CalcHashTableStats(TotWalkersNew, iter_data) TotWalkers = TotWalkersNew end subroutine subroutine calc_replica_overlaps() ! A routine for just calculating the overlap, in cases where ! orthogonalisation is not being performed. integer :: j, run, tgt_run, src_run real(dp) :: sgn(lenof_sign) real(dp) :: norms(inum_runs), overlaps(inum_runs, inum_runs) character(*), parameter :: this_routine = 'calc_replica_overlaps' norms = 0.0_dp overlaps = 0.0_dp do j = 1, int(TotWalkers) ! n.b. We are using a non-contiguous list (Hash algorithm) call extract_sign(CurrentDets(:, j), sgn) if (IsUnoccDet(sgn)) cycle #ifndef CMPLX_ norms = norms + sgn * sgn #endif do tgt_run = 1, inum_runs do run = tgt_run + 1, inum_runs overlaps(tgt_run, run) = overlaps(tgt_run, run) & + sgn(tgt_run) * sgn(run) overlaps(run, tgt_run) = 99999999.0_dp ! invalid end do end do end do ! And ensure that the norm/overlap data is accumulated onto all ! of the processors. call MPISumAll(norms, all_norms) call MPISumAll(overlaps, all_overlaps) ! Store a normalised overlap matrix for each of the replicas. do src_run = 1, inum_runs - 1 do tgt_run = src_run + 1, inum_runs if (all_norms(src_run) * all_norms(tgt_run) > EPS) then replica_overlaps_real(src_run, tgt_run) = & all_overlaps(src_run, tgt_run) / & sqrt(all_norms(src_run) * all_norms(tgt_run)) end if replica_overlaps_real(src_run, tgt_run) = & replica_overlaps_real(src_run, tgt_run) end do end do end subroutine calc_replica_overlaps end module