#include "macros.h" module hist use SystemData, only: nbasis, nel, LMS, LMS, tHPHF, & tOddS_HPHF, G1, tGUGA use DetBitOps, only: count_open_orbs, EncodeBitDet, spatial_bit_det, & DetBitEq, count_open_orbs, TestClosedShellDet, & CalcOpenOrbs, IsAllowedHPHF, FindBitExcitLevel use load_balance_calcnodes, only: DetermineDetNode use CalcData, only: tFCIMC, tTruncInitiator use DetCalcData, only: FCIDetIndex, det use FciMCData, only: tFlippedSign, TotWalkers, CurrentDets, iter, & all_norm_psi_squared, ilutRef use HPHFRandExcitMod, only: FindExcitBitDetSym use hphf_integrals, only: hphf_sign use bit_rep_data, only: NIfTot, NIfD, extract_sign, test_flag, flag_initiator use bit_reps, only: encode_sign, extract_bit_rep, & decode_bit_det, & get_initiator_flag, writebitdet, & any_run_is_initiator use searching, only: BinSearchParts2 use DeterminantData, only: get_lexicographic, calculated_ms, write_det use util_mod, only: stop_all, binary_search_ilut, operator(.div.), & get_free_unit, choose_i64 use hist_data, only: hist_excit_tofrom, tag_hist_excit, HistogramEnergy, & HistMinInd, BinRange, excit_tofrom_unit, Histogram, InstHist, & iNoBins, tHistSpawn use constants, only: n_int, bits_n_int, size_n_int, lenof_sign, & dp, MPIArg, inum_runs, stdout use parallel_neci, only: nProcessors, MPISum, MPIBcast, MPIAlltoAll, & MPIAlltoAllv, MPIAllLORLogical, root, iProcIndex use timing_neci, only: set_timer, timer, halt_timer use MemoryManager, only: LogMemAlloc, LogMemDealloc implicit none contains subroutine ilut_nifd_pointer_assign(ptr, ilut) ! This is a useful helper function to get around some irritating ! behaviour (namely that pointer array slices are indexed to ! begin at 0, not 1 --> wrong for iluts). integer(n_int), intent(in), target :: ilut(0:NIfTot) integer(n_int), intent(out), pointer :: ptr(:) ptr => ilut end subroutine subroutine add_hist_spawn(ilut, sign, ExcitLevel, dProbFin) integer(n_int), intent(in) :: ilut(0:NIfTot) integer, intent(in) :: ExcitLevel real(dp), intent(in) :: dProbFin real(dp), intent(in) :: sign(lenof_sign) integer :: PartInd, open_orbs integer(n_int) :: ilut_sym(0:NIfTot) real(dp) :: delta(lenof_sign) logical :: tSuccess character(*), parameter :: t_r = 'add_hist_spawn' if (ExcitLevel == nel) then call BinSearchParts2(ilut, HistMinInd(ExcitLevel), det, PartInd, & tSuccess) ! CCMC doesn't sum particle contributions in order, so we must ! search the whole space again! if (tFCIMC) HistMinInd(ExcitLevel) = PartInd else if (ExcitLevel == 0) then PartInd = 1 tSuccess = .true. else call BinSearchParts2(ilut, HistMinInd(ExcitLevel), & FCIDetIndex(ExcitLevel + 1) - 1, PartInd, & tSuccess) end if if (tSuccess) then delta = sign / dProbFin if (tHPHF) then call FindExcitBitDetSym(ilut, ilut_sym) if (.not. DetBitEq(ilut, ilut_sym)) delta = delta / sqrt(2.0_dp) end if if (tFlippedSign) delta = -delta Histogram(:, PartInd) = Histogram(:, PartInd) + delta if (tHistSpawn) & InstHist(:, PartInd) = InstHist(:, PartInd) + delta ! In HPHF we also include the spin-coupled determinant, which will ! have the same amplitude as the original determinant, unless it ! is antisymmetric. if (tHPHF .and. .not. DetBitEQ(ilut, ilut_sym)) then if (ExcitLevel == nel) then call BinSearchParts2(ilut_sym, FCIDetIndex(ExcitLevel), & Det, PartInd, tSuccess) else if (ExcitLevel == 0) then PartInd = 1 tSuccess = .true. else call BinSearchParts2(ilut_sym, FCIDetIndex(ExcitLevel), & FCIDetIndex(ExcitLevel + 1) - 1, & PartInd, tSuccess) end if if (tSuccess) then delta = (sign / sqrt(2.0_dp)) / dProbFin call CalcOpenOrbs(ilut_sym, open_orbs) if ((mod(open_orbs, 2) == 1) .neqv. tOddS_HPHF) & delta = -delta if (tFlippedSign .neqv. tOddS_HPHF) delta = -delta Histogram(:, PartInd) = Histogram(:, PartInd) + delta if (tHistSpawn) & InstHist(:, PartInd) = InstHist(:, PartInd) + delta end if end if else call writebitdet(stdout, ilut, .true.) write(stdout, *) '***', ilut write(stdout, *) '***', ExcitLevel, HistMinInd(ExcitLevel), Det call stop_all(t_r, "Cannot find corresponding FCI determinant & &when histogramming") end if end subroutine subroutine find_hist_coeff_explicit(ilut, ExcitLevel, PartInd, tSuccess) implicit none integer(n_int), intent(in) :: ilut(0:NIfTot) integer, intent(in) :: ExcitLevel integer, intent(out) :: PartInd logical, intent(out) :: tSuccess tSuccess = .false. if (ExcitLevel == nel) then call BinSearchParts2(ilut, FCIDetIndex(ExcitLevel), det, PartInd, & tSuccess) else if (ExcitLevel == 0) then PartInd = 1 tSuccess = .true. else call BinSearchParts2(ilut, FCIDetIndex(ExcitLevel), & FCIDetIndex(ExcitLevel + 1) - 1, PartInd, & tSuccess) end if end subroutine subroutine add_hist_energies(ilut, Sign, HDiag) ! This will histogram the energies of the particles, rather than the ! determinants themselves. integer(n_int), intent(in) :: ilut(0:NIfTot) real(dp), dimension(lenof_sign), intent(in) :: Sign real(dp), intent(in) :: HDiag integer :: bin character(*), parameter :: t_r = "add_hist_energies" integer(n_int) :: iUnused bin = int(HDiag / BinRange) + 1 if (bin > iNoBins) & call stop_all(t_r, "Histogramming energies higher than the & &arrays can cope with. Increase iNoBins or BinRange") HistogramEnergy(bin) = HistogramEnergy(bin) + sum(abs(Sign)) ! Avoid compiler warnings iUnused = ilut(0) end subroutine function calc_s_squared(only_init) result(ssq) ! Calculate the instantaneous value of S^2 for the walkers stored ! in CurrentDets ! ! --> This could be generalised to an arbitrary list of iluts. We ! would also then need to calculate the value of psi_squared real(dp) :: ssq(inum_runs), tmp(inum_runs) integer :: i, run logical, intent(in) :: only_init type(timer), save :: s2_timer s2_timer%timer_name = 'S2 local' call set_timer(s2_timer) ssq = 0 do i = 1, int(TotWalkers) if ((test_flag(CurrentDets(:, i), get_initiator_flag(1)) .or. & test_flag(CurrentDets(:, i), get_initiator_flag(lenof_sign))) & .and. .not. TestClosedShellDet(CurrentDets(:, i))) then ssq = ssq + ssquared_contrib(CurrentDets(:, i), only_init) end if end do ! Sum over all processors and normalise call MPISum(ssq, tmp) ssq = tmp do run = 1, inum_runs if (abs(all_norm_psi_squared(run)) < 1.0e-10_dp) then ssq(run) = 0.0_dp else ssq(run) = ssq(run) / all_norm_psi_squared(run) ! TODO: n.b. This is a hack. LMS appears to contain -2*Ms of the ! system I am somewhat astounded I haven't noticed this ! before... ssq(run) = ssq(run) & + real(calculated_ms * (calculated_ms + 2), dp) / 4 end if end do call halt_timer(s2_timer) end function function calc_s_squared_multi() result(ssq) integer :: max_linked, max_per_proc, max_spawned real(dp), dimension(inum_runs) :: ssq type(timer), save :: s2_timer s2_timer%timer_name = 'S^2' call set_timer(s2_timer) max_linked = int(choose_i64(nel, (nel + LMS) / 2)) max_per_proc = 2 * (max_linked / nProcessors) + 1 max_spawned = max_per_proc * nProcessors ssq = calc_s_squared_multi_worker(max_per_proc, max_spawned) call halt_timer(s2_timer) end function function calc_s_squared_multi_worker(max_per_proc, max_spawned) & result(ssq) integer :: i, j, k, orb2, orbtmp, pos, ierr integer :: nI(nel), nJ(nel), proc integer(n_int), pointer :: detcurr(:) integer(n_int) :: splus(0:NIfTot), sminus(0:NIfTot) logical :: running, any_running real(dp), dimension(inum_runs) :: ssq, tmp integer :: max_per_proc, max_spawned, run real(dp) :: sgn1(lenof_sign), sgn2(lenof_sign) ! Could we pre-initialise all of these data structures !integer :: max_linked = int(choose(nel, (nel + LMS)/2)) integer(n_int) :: det_list(0:NIfTot, max_spawned) integer(n_int) :: recv_dets(0:NIfTot, max_spawned) integer :: proc_pos(nProcessors), proc_pos_init(nProcessors) integer :: send_count(nProcessors), recv_count(nProcessors) integer(MPIArg) :: send_data(nProcessors), recv_data(nProcessors) integer(MPIArg) :: send_off(nProcessors), recv_off(nProcessors) running = (TotWalkers > 0) any_running = .true. j = 1 ssq = 0 forall (i=1:nProcessors) proc_pos_init(i) = (i - 1) * max_per_proc + 1 do while (any_running) ! Clear transmission lists proc_pos = proc_pos_init if (running) then ! Generate items, add to list (and use the sgn of initial ! walker, so we send it to the target processor) call ilut_nifd_pointer_assign(detcurr, CurrentDets(0:NIfTot, j)) call decode_bit_det(nI, detcurr) do i = 1, nel orbtmp = get_alpha(nI(i)) if (is_beta(nI(i)) & .and. IsNotOcc(detcurr, orbtmp)) then splus = detcurr clr_orb(splus, nI(i)) set_orb(splus, orbtmp) do k = 1, nel orb2 = nI(k) if (k == i) orb2 = get_alpha(orb2) orbtmp = get_beta(orb2) if (is_alpha(orb2) & .and. IsNotOcc(splus, orbtmp)) then sminus = splus clr_orb(sminus, orb2) set_orb(sminus, orbtmp) ! Store this det (n.b. contains original sgn) call decode_bit_det(nJ, sminus) proc = DetermineDetNode(nel, nJ, 0) + 1 det_list(:, proc_pos(proc)) = sminus proc_pos(proc) = proc_pos(proc) + 1 end if end do end if end do ! Walk through the list. Stop when we get to the end. j = j + 1 if (j > TotWalkers) running = .false. end if ! How many elements are there in each list? send_count = proc_pos - proc_pos_init if (any(send_count > max_per_proc)) & send_count = max_per_proc + 1 call MPIAlltoAll(send_count, 1, recv_count, 1, ierr) send_off = int((proc_pos_init - 1) * (NIfTot + 1), MPIArg) recv_off(1) = 0 do i = 2, nProcessors recv_off(i) = recv_off(i - 1) + int(recv_count(i - 1), MPIArg) end do recv_off = recv_off * int(NIfTot + 1, MPIArg) send_data = int(send_count * (NIfTot + 1), MPIArg) recv_data = int(recv_count * (NIfTot + 1), MPIArg) call MPIAlltoAllv(det_list, send_data, send_off, & recv_dets, recv_data, recv_off, ierr) ! Find the det in list, and sum in its term. do i = 1, sum(recv_count) ! The sign of the source particle call extract_sign(recv_dets(:, i), sgn1) ! And the generated, connected particle pos = binary_search_ilut(CurrentDets(:, 1:TotWalkers), & recv_dets(:, i), NIfD + 1) if (pos > 0) then call extract_sign(CurrentDets(:, pos), sgn2) ssq = ssq + (sgn1(1) * sgn2(1)) end if end do ! Is there anything left to do on any process? call MPIAllLORLogical(running, any_running) end do call MPISum(ssq, tmp) ssq = tmp do run = 1, inum_runs if (abs(all_norm_psi_squared(run)) < 1.0e-10_dp) then ssq(run) = 0.0_dp else ssq(run) = ssq(run) / all_norm_psi_squared(run) ! TODO: n.b. This is a hack. LMS appears to contain -2Ms of the ! system. I am somewhat astounded I haven't noticed ! this before... ssq(run) = ssq(run) & + real(calculated_ms * (calculated_ms + 2), dp) / 4 end if end do end function function calc_s_squared_star(only_init) result(ssq) real(dp), dimension(inum_runs) :: ssq integer, parameter :: max_per_proc = 1000 integer(n_int) :: recv_dets(0:NIfTot, max_per_proc) integer :: proc_dets, start_pos, nsend, i, p integer :: bcast_tmp(2), run real(dp) :: sgn_tmp(lenof_sign) type(timer), save :: s2_timer, s2_timer_init real(dp), dimension(inum_runs) :: ssq_sum, psi_squared real(dp), dimension(inum_runs):: All_ssq_sum, All_psi_squared logical, intent(in) :: only_init s2_timer%timer_name = 'S^2 star' s2_timer_init%timer_name = 'S^2 init' if (only_init) then call set_timer(s2_timer_init) else call set_timer(s2_timer) end if ssq_sum = 0.0_dp psi_squared = 0.0_dp do p = 0, nProcessors - 1 ! How many dets are on processor p proc_dets = int(TotWalkers) call MPIBcast(proc_dets, iProcIndex == p) ! Send the dets around bit by bit start_pos = 1 do while (start_pos <= proc_dets) if (tTruncInitiator .and. only_init) then ! Loop over walkers and only add initiators to bcast list nsend = 0 if (p == iProcIndex) then do i = start_pos, int(TotWalkers) ! Break up the list into correctly sized chunks if (nsend == max_per_proc) exit if (any_run_is_initiator(CurrentDets(:, i))) then nsend = nsend + 1 recv_dets(:, nsend) = CurrentDets(:, i) ! If we are using initiators only, keep track ! of the overall magnitude of the init-only ! wavefunction. call extract_sign(CurrentDets(:, i), sgn_tmp) psi_squared = psi_squared + sum(sgn_tmp**2) end if end do start_pos = i end if bcast_tmp = (/nsend, start_pos/) call MPIBcast(bcast_tmp, p == iProcIndex) nsend = bcast_tmp(1) start_pos = bcast_tmp(2) else ! How many dets to send in this batch? if (start_pos + max_per_proc - 1 > proc_dets) then nsend = proc_dets - start_pos + 1 else nsend = max_per_proc end if ! Update list of received dets if (p == iProcIndex) recv_dets(:, 1:nsend) = & CurrentDets(:, start_pos:start_pos + nsend - 1) ! Increment position start_pos = start_pos + nsend end if ! Broadcast to all processors call MPIBcast(recv_dets(:, 1:nsend), iProcIndex == p) ! All processors loop over these dets, and calculate their ! contribution to S^2 do i = 1, nsend if (.not. TestClosedShellDet(recv_dets(:, i))) then ! If nopen == 2, and tHPHF, then this can be ! optimised further ssq_sum = ssq_sum + ssquared_contrib(recv_dets(:, i), & only_init) end if end do end do end do ! Loop over processors ! Sum all of the s squared terms if (tTruncInitiator .and. only_init) then call MPISum(psi_squared, All_psi_squared) psi_squared = All_psi_squared else psi_squared = all_norm_psi_squared end if call MPISum(ssq_sum, All_ssq_sum) ssq_sum = All_ssq_sum do run = 1, inum_runs if (abs(psi_squared(run)) < 1.0e-10_dp) then ssq(run) = 0.0_dp else ssq(run) = real(ssq_sum(run), dp) / psi_squared(run) ! TODO: n.b. This is a hack. LMS appears to contain -2Ms of the ! system. I am somewhat astounded I haven't noticed ! this before... ssq(run) = ssq(run) & + real(calculated_ms * (calculated_ms + 2), dp) / 4.0 end if end do if (only_init) then call halt_timer(s2_timer_init) else call halt_timer(s2_timer) end if end function function ssquared_contrib(ilut, only_init_, n_opt, ilut_list_opt) result(ssq) ! Calculate the contribution to s-squared from the determinant ! provided (from walkers on this processor). ! ! This applies the operator S-S+, returning the result: ! ! <Psi(iProcIndex) | S-S+ | D_i> integer(n_int), intent(in) :: ilut(0:NIfTot) logical, intent(in), optional :: only_init_ integer, intent(in), optional :: n_opt integer(n_int), intent(in), optional :: ilut_list_opt(0:, :) real(dp) :: ssq #ifdef DEBUG_ character(*), parameter :: this_routine = "ssquared_contrib" #endif integer(n_int) :: splus(0:NIfD), sminus(0:NIfD) integer(n_int) :: ilut_srch(0:NIfD), ilut_sym(0:NIfD) real(dp) :: sgn(lenof_sign), sgn2(lenof_sign), sgn_hphf integer :: flg, nI(nel), j, k, orb2, pos, orb_tmp logical :: only_init, inc integer :: n_states integer(n_int), allocatable :: ilut_list(:, :) if (present(only_init_)) then only_init = only_init_ else only_init = .false. end if ! make this routine more flexible and usable not only for CurrentDets if (present(n_opt)) then ASSERT(present(ilut_list_opt)) ASSERT(size(ilut_list_opt, 2) == n_opt) n_states = n_opt allocate(ilut_list(0:niftot, n_states), source=ilut_list_opt(0:niftot, 1:n_opt)) else n_states = int(TotWalkers) allocate(ilut_list(0:niftot, TotWalkers), source=CurrentDets(0:niftot, 1:TotWalkers)) end if ! Extract details of determinant call extract_bit_rep(ilut, nI, sgn, flg) ssq = 0.0_dp do j = 1, nel if (is_beta(nI(j)) & .and. IsNotOcc(ilut, get_alpha(nI(j)))) then splus = ilut(0:NIfD) orb_tmp = get_alpha(nI(j)) clr_orb(splus, nI(j)) set_orb(splus, orb_tmp) do k = 1, nel orb2 = nI(k) if (k == j) orb2 = get_alpha(orb2) orb_tmp = get_beta(orb2) if (is_alpha(orb2) & .and. IsNotOcc(splus, orb_tmp)) then sminus = splus clr_orb(sminus, orb2) set_orb(sminus, orb_tmp) ! Adjust for the sign of the paired det in HPHF. sgn_hphf = 1.0 if (tHPHF) then if (IsAllowedHPHF(sminus, ilut_sym)) then ilut_srch = sminus else ilut_srch = ilut_sym sgn_hphf = hphf_sign(ilut_srch) end if else ilut_srch = sminus end if ! --> sminus is an allowed result of applying S-S+ pos = binary_search_ilut(ilut_list(:, 1:n_states), & ilut_srch, NIfD + 1) if (pos > 0) then ! If we are looking for the spin of the initiator ! only wavefunction, we need to ensure that we ! are projecting onto an initiator... inc = .true. if (tTruncInitiator .and. only_init) then inc = (any_run_is_initiator(ilut_list(:, pos))) end if call extract_sign(ilut_list(:, pos), sgn2) ssq = ssq + sgn(1) * sgn2(1) * sgn_hphf end if end if end do end if end do end function subroutine init_hist_excit_tofrom() integer :: ierr character(*), parameter :: this_routine = 'init_hist_excit_tofrom' ! Initialise storage for these excitaitons allocate(hist_excit_tofrom(0:nel, 0:nel), stat=ierr) block use constants, only: int64 use util_mod, only: operator(.div.) if (ierr /= 0) then call stop_all(this_routine, 'Error in allocation of hist_excit_tofrom.') end if call LogMemAlloc("hist_excit_tofrom", size(hist_excit_tofrom, kind=int64), int(sizeof(hist_excit_tofrom), kind=int64) .div.& & size(hist_excit_tofrom, kind=int64), this_routine, tag_hist_excit, ierr) end block ! Zero everything hist_excit_tofrom = 0 if (iProcIndex == root) then ! Open an output file excit_tofrom_unit = get_free_unit() open (excit_tofrom_unit, file='spawns_tofrom_excit', & status='replace') ! Write a header in the output file write (excit_tofrom_unit, '("# Number of particles spawned between & &excitation levels from the Hartree--& &Fock on this update cycle")') write (excit_tofrom_unit, '("# iter, 0-->0, 0-->1, 0-->2, ..., & &1-->0, ...")') end if end subroutine subroutine clean_hist_excit_tofrom() character(*), parameter :: this_routine = 'clean_hist_excit_tofrom' ! Clean up the stored data. deallocate(hist_excit_tofrom) log_dealloc(tag_hist_excit) ! Close the output file if (iProcIndex == root) & close(excit_tofrom_unit) end subroutine subroutine add_hist_excit_tofrom(iluti, ilutj, child) integer(n_int), intent(in) :: ilutI(0:NIfTot), ilutJ(0:NIfTot) real(dp), intent(in) :: child(lenof_sign) real(dp) :: abschild integer :: exlevelI, exlevelJ character(*), parameter :: this_routine = "add_hist_excit_tofrom" ! We want a total count of the particle weight formed. abschild = sum(abs(child)) ! Get the excitation levels of the source and target ASSERT(.not. tGUGA) exlevelI = FindBitExcitLevel(ilutRef(:, 1), ilutI, t_hphf_ic=.true.) exlevelJ = FindBitExcitLevel(ilutRef(:, 1), ilutJ, t_hphf_ic=.true.) ! And store it! hist_excit_tofrom(exlevelI, exlevelJ) = & hist_excit_tofrom(exlevelI, exlevelJ) + abschild end subroutine subroutine write_zero_hist_excit_tofrom() real(dp) :: all_hist(0:nel, 0:nel) integer :: i, j ! Gather the accumulator data to this process, and then zero the data ! for the next update cycle. call MPISum(hist_excit_tofrom, all_hist) hist_excit_tofrom = 0 if (iProcIndex == root) then ! Output the current iteration write(excit_tofrom_unit, '(i12)', advance='no') iter ! And output the accumulated data do i = 0, nel do j = 0, nel write(excit_tofrom_unit, '(f16.5)', advance='no') & all_hist(i, j) end do end do write(excit_tofrom_unit, *) end if end subroutine end module