SUBROUTINE InitFCIMCCalcPar()
INTEGER :: ierr, iunithead
logical :: formpops, binpops, tStartedFromCoreGround
integer(int64) :: MemoryAlloc
INTEGER :: error, PopsVersion
character(*), parameter :: t_r = 'InitFCIMCPar', this_routine = t_r
integer :: PopBlockingIter
real(dp) :: ExpectedMemWalk, read_tau, read_psingles, read_pparallel, read_ptriples
integer(int64) :: read_walkers_on_nodes(0:nProcessors - 1)
integer :: read_nnodes
!Variables from popsfile header...
logical :: tPop64Bit, tPopHPHF, tPopLz
integer :: iPopLenof_sign, iPopNel, iPopIter, PopNIfD, PopNIfSgn, PopNIfFlag, PopNIfTot, Popinum_runs
integer :: PopRandomHash(2056), PopBalanceBlocks
integer(int64) :: iPopAllTotWalkers
integer :: i, run
real(dp) :: PopDiagSft(1:inum_runs)
real(dp), dimension(lenof_sign) :: PopSumNoatHF
HElement_t(dp) :: PopAllSumENum(1:inum_runs)
integer :: perturb_ncreate, perturb_nannihilate
integer :: nrdms_standard, nrdms_transition
character(255) :: identifier
! set default pops version, this should not have any functional impact,
! but prevents using it uninitialized
!default
Popinum_runs = 1
! default version for popsfiles, this does not have any functional effect,
! but prevents it from using uninitialized
PopsVersion = 4
if (tPopsAlias) then
identifier = aliasStem
else
identifier = 'POPSFILE'
end if
if (tReadPops .and. .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then
call open_pops_head(iunithead, formpops, binpops, identifier)
PopsVersion = FindPopsfileVersion(iunithead)
if (iProcIndex == root) close(iunithead)
write(stdout, *) "POPSFILE VERSION ", PopsVersion, " detected."
end if
if (tReadPops .and. (PopsVersion < 3) .and. &
.not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then
!Read in particles from multiple POPSFILES for each processor
!Ugh - need to set up ValidSpawnedList here too...
call SetupValidSpawned(int(InitWalkers, int64))
write(stdout, *) "Reading in initial particle configuration from *OLD* POPSFILES..."
CALL ReadFromPopsFilePar()
ELSE
!Scale walker number
!This is needed to be done here rather than later,
!because the arrays should be allocated with appropariate sizes
if (tReadPops .and. .not. tPopsAlreadyRead) then
InitWalkers = InitWalkers * ScaleWalkers
end if
!initialise the particle positions - start at HF with positive sign
!Set the maximum number of walkers allowed
if (tReadPops .and. .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then
!Read header.
call open_pops_head(iunithead, formpops, binpops, identifier)
if (PopsVersion == 3) then
call ReadPopsHeadv3(iunithead, tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, &
iPopAllTotWalkers, PopDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, &
PopNIfD, PopNIfSgn, PopNIfFlag, PopNIfTot)
! The following values were not read in...
read_tau = 0.0_dp
read_nnodes = 0
PopBalanceBlocks = -1
else if (PopsVersion == 4) then
! The only difference between 3 & 4 is just that 4 reads
! in via a namelist, so that we can add more details
! whenever we want.
call ReadPopsHeadv4(iunithead, tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, &
iPopAllTotWalkers, PopDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, &
PopNIfD, PopNIfSgn, Popinum_runs, PopNIfFlag, PopNIfTot, &
read_tau, PopBlockingIter, PopRandomHash, read_psingles, &
read_pparallel, read_ptriples, read_nnodes, read_walkers_on_nodes, &
PopBalanceBlocks)
else
call stop_all(this_routine, "Popsfile version invalid")
end if
! Check the number of electrons created and annihilated by the
! perturbation operators, if any are being used.
if (allocated(pops_pert)) then
perturb_ncreate = pops_pert(1)%ncreate
perturb_nannihilate = pops_pert(1)%nannihilate
else
perturb_ncreate = 0
perturb_nannihilate = 0
end if
call CheckPopsParams(tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, &
iPopAllTotWalkers, PopDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, &
PopNIfD, PopNIfSgn, PopNIfTot, &
MaxWalkersUncorrected, read_tau, PopBlockingIter, read_psingles, read_pparallel, &
read_ptriples, perturb_ncreate, perturb_nannihilate)
if (iProcIndex == root) close(iunithead)
else
MaxWalkersUncorrected = int(InitWalkers)
end if
MaxWalkersPart = NINT(MemoryFacPart * MaxWalkersUncorrected)
ExpectedMemWalk = real( (NIfTot + 1) * size_n_int + 8, dp ) * &
real(MaxWalkersPart, dp) / 2._dp**20
if (ExpectedMemWalk < 20.0) then
!Increase memory allowance for small runs to a min of 20mb
MaxWalkersPart = ceiling( 20._dp * 2._dp**20 / &
real((NIfTot + 1) * size_n_int + 8, dp) )
block
character(*), parameter :: mem_warning = "Low memory requested for walkers, so increasing memory to 20Mb to avoid memory errors"
if (iProcIndex == root) write(stderr, "(A)") mem_warning
end block
end if
write(stdout, "(A,I14)") "Memory allocated for a maximum particle number per node of: ", MaxWalkersPart
!Here is where MaxSpawned is set up - do we want to set up a minimum allocation here too?
Call SetupValidSpawned(int(InitWalkers, int64))
!Put a barrier here so all processes synchronise
CALL MPIBarrier(error)
!Allocate memory to hold walkers
allocate(WalkVecDets(0:NIfTot, MaxWalkersPart), stat=ierr)
CALL LogMemAlloc('WalkVecDets', MaxWalkersPart * (NIfTot + 1), size_n_int, this_routine, WalkVecDetsTag, ierr)
WalkVecDets(0:NIfTot, 1:MaxWalkersPart) = 0
MemoryAlloc = (NIfTot + 1) * MaxWalkersPart * size_n_int !Memory Allocated in bytes
if (alloc_popsfile_dets) allocate(popsfile_dets(0:NIfTot, MaxWalkersPart), stat=ierr)
nrdms_standard = 0
nrdms_transition = 0
if (tRDMOnFly) then
if (tPairedReplicas) then
nrdms_standard = lenof_sign.div.2
else
nrdms_standard = lenof_sign
end if
if (tTransitionRDMs) then
! nrdms_transition_input will have been read in from the user
! input. But if we have two different replicas for each state
! sampled, then there are two ways to form each transition RDMs.
nrdms_transition = nrdms_transition_input * nreplicas
end if
end if
! Allocate storage for persistent data to be stored alongside
! the current determinant list (particularly diagonal matrix
! elements, i.e. CurrentH; now global_determinant_data).
call init_global_det_data(nrdms_standard, nrdms_transition)
! If we are doing cont time, then initialise it here
call init_cont_time()
! set the dummies for trial wavefunction connected space
! load balancing before trial wf initialization
if (tTrialWavefunction) then
allocate(con_send_buf(0, 0))
con_space_size = 0
end if
write(stdout, "(A,I12,A)") "Spawning vectors allowing for a total of ", MaxSpawned, &
" particles to be spawned in any one iteration per core."
write(stdout, *) "Memory requirement ", IlutBits%len_bcast * 8.0_dp * ( &
MaxSpawned / 1048576.0_dp), "MB"
allocate(SpawnVec(0:IlutBits%len_bcast, MaxSpawned), &
stat=ierr, source=0_n_int)
block
use constants, only: int64
use util_mod, only: operator(.div.)
if (ierr /= 0) then
call stop_all(this_routine, 'Error in allocation of SpawnVec.')
end if
call LogMemAlloc("SpawnVec", size(SpawnVec, kind=int64), int(sizeof(SpawnVec), kind=int64) .div. size(SpawnVec, kind=int64),&
& this_routine, SpawnVecTag, ierr)
end block
allocate(SpawnVec2(0:IlutBits%len_bcast, MaxSpawned), &
stat=ierr, source=0_n_int)
block
use constants, only: int64
use util_mod, only: operator(.div.)
if (ierr /= 0) then
call stop_all(this_routine, 'Error in allocation of SpawnVec2.')
end if
call LogMemAlloc("SpawnVec2", size(SpawnVec2, kind=int64), int(sizeof(SpawnVec2), kind=int64) .div. size(SpawnVec2, kind=int64),&
& this_routine, SpawnVec2Tag, ierr)
end block
if (use_spawn_hash_table) then
nhashes_spawn = ceiling(0.8 * MaxSpawned)
allocate(spawn_ht(nhashes_spawn), stat=ierr)
call init_hash_table(spawn_ht)
end if
!Point at correct spawning arrays
SpawnedParts => SpawnVec
SpawnedParts2 => SpawnVec2
MemoryAlloc = MemoryAlloc + (NIfTot + 1) * MaxSpawned * 2 * size_n_int
if (tAutoAdaptiveShift) then
allocate(SpawnInfoVec(1:SpawnInfoWidth, MaxSpawned), stat=ierr)
block
use constants, only: int64
use util_mod, only: operator(.div.)
if (ierr /= 0) then
call stop_all(this_routine, 'Error in allocation of SpawnInfoVec.')
end if
call LogMemAlloc("SpawnInfoVec", size(SpawnInfoVec, kind=int64), int(sizeof(SpawnInfoVec), kind=int64) .div. size(SpawnInfoVec,&
& kind=int64), this_routine, SpawnInfoVecTag, ierr)
end block
allocate(SpawnInfoVec2(1:SpawnInfoWidth, MaxSpawned), stat=ierr)
block
use constants, only: int64
use util_mod, only: operator(.div.)
if (ierr /= 0) then
call stop_all(this_routine, 'Error in allocation of SpawnInfoVec2.')
end if
call LogMemAlloc("SpawnInfoVec2", size(SpawnInfoVec2, kind=int64), int(sizeof(SpawnInfoVec2), kind=int64) .div.&
& size(SpawnInfoVec2, kind=int64), this_routine, SpawnInfoVec2Tag, ierr)
end block
SpawnInfoVec(:, :) = 0
SpawnInfoVec2(:, :) = 0
SpawnInfo => SpawnInfoVec
SpawnInfo2 => SpawnInfoVec2
MemoryAlloc = MemoryAlloc + (SpawnInfoWidth) * MaxSpawned * 2 * size_n_int
end if
write(stdout, "(A)") "Storing walkers in hash-table. Algorithm is now formally linear scaling with walker number"
write(stdout, "(A,I15)") "Length of hash-table: ", nWalkerHashes
write(stdout, "(A,F20.5)") "Length of hash-table as a fraction of targetwalkers: ", HashLengthFrac
! TODO: Correct the memory usage.
!MemTemp=2*(8*(nClashMax+1)*nWalkerHashes)+8*MaxWalkersPart
!write(stdout,"(A,F10.3,A)") "This will use ",real(MemTemp,dp)/1048576.0_dp,&
! " Mb of memory per process, although this is likely to increase as it expands"
allocate(HashIndex(nWalkerHashes), stat=ierr)
if (ierr /= 0) call stop_all(this_routine, "Error in allocation")
do i = 1, nWalkerHashes
HashIndex(i)%ind = 0
end do
!Also need to allocate memory for the freeslot array
allocate(FreeSlot(MaxWalkersPart), stat=ierr)
if (ierr /= 0) call stop_all(this_routine, "Error in allocation")
freeslot(:) = 0
!MemoryAlloc=MemoryAlloc+MemTemp
! Allocate pointers to the correct walker arrays
CurrentDets => WalkVecDets
! Get the (0-based) processor index for the HF det.
do run = 1, inum_runs
iRefProc(run) = DetermineDetNode(nel, ProjEDet(:, run), 0)
end do
write(stdout, "(A,I8)") "Reference processor is: ", iRefProc(1)
write(stdout, "(A)", advance='no') "Initial reference is: "
call write_det(stdout, ProjEDet(:, 1), .true.)
TotParts(:) = 0.0
TotPartsOld(:) = 0.0
NoatHF(:) = 0.0_dp
InstNoatHF(:) = 0.0_dp
! Has been moved to guarantee initialization before first load balancing
! Initialises RDM stuff for both explicit and stochastic calculations of RDM.
tFillingStochRDMonFly = .false.
tFillingExplicRDMonFly = .false.
!One of these becomes true when we have reached the relevant iteration to begin filling the RDM.
! initialize excitation generator
if (t_guga_pchb) call init_guga_pchb_excitgen()
! If we have a popsfile, read the walkers in now.
if (tReadPops .and. .not. tPopsAlreadyRead) then
call InitFCIMC_pops(iPopAllTotWalkers, PopNIfSgn, iPopNel, read_nnodes, &
read_walkers_on_nodes, pops_pert, &
PopBalanceBLocks, PopDiagSft)
else
if (tStartMP1) then
!Initialise walkers according to mp1 amplitude.
call InitFCIMC_MP1()
else if (tStartCAS) then
!Initialise walkers according to a CAS diagonalisation.
call InitFCIMC_CAS()
else if (tTrialInit .or. (tOrthogonaliseReplicas .and. &
.not. tReplicaSingleDetStart)) then
call InitFCIMC_trial()
else if (tInitializeCSF) then
call InitFCIMC_CSF()
else !Set up walkers on HF det
if (tStartSinglePart) then
write(stdout, "(A,I10,A,F9.3,A,I15)") "Initial number of particles set to ", int(InitialPart), &
" and shift will be held at ", DiagSft(1), " until particle number gets to ", int(InitWalkers * nNodes)
else
write(stdout, "(A,I16)") "Initial number of walkers per processor chosen to be: ", nint(InitWalkers)
end if
call InitFCIMC_HF()
end if !tStartmp1
end if
if (t_ueg_3_body .and. tTrcorrExgen) tLatticeGens = .false.
write(stdout, "(A,F14.6,A)") " Initial memory (without excitgens + temp arrays) consists of : ", &
& REAL(MemoryAlloc, dp) / 1048576.0_dp, " Mb/Processor"
write(stdout, *) "Only one array of memory to store main particle list allocated..."
write(stdout, *) "Initial memory allocation sucessful..."
write(stdout, *) "============================================="
CALL neci_flush(stdout)
end if !End if initial walkers method
! There was no last output, use the same value as for the shift update
AllTotPartsLastOutput = AllTotPartsOld
!Put a barrier here so all processes synchronise
CALL MPIBarrier(error)
call init_norm()
IF (tPrintOrbOcc) THEN
allocate(OrbOccs(nBasis), stat=ierr)
CALL LogMemAlloc('OrbOccs', nBasis, 8, this_routine, OrbOccsTag, ierr)
OrbOccs(:) = 0.0_dp
end if
IF (tHistInitPops) THEN
CALL InitHistInitPops()
end if
tPrintHighPop = .false.
MaxInitPopPos = 0.0
MaxInitPopNeg = 0.0
IF (abs(MaxNoatHF) < 1.0e-12_dp) THEN
MaxNoatHF = InitWalkers * nNodes
HFPopThresh = int(MaxNoatHF, int64)
end if
! Initialise excitation generation storage
call init_excit_gen_store(fcimc_excit_gen_store)
if (t_pcpp_excitgen) call init_pcpp_excitgen()
if (t_impurity_excitgen) call setupImpurityExcitgen()
! [W.D.] I guess I want to initialize that before the tau-search,
! or otherwise some pgens get calculated incorrectly
if (t_back_spawn .or. t_back_spawn_flex) then
call init_back_spawn()
end if
! also i should warn the user if this is a restarted run with a
! set delay in the back-spawning method:
! is there actually a use-case where someone really wants to delay
! a back-spawn in a restarted run?
if (tReadPops .and. back_spawn_delay /= 0) then
call Warning_neci(t_r, &
"Do you really want a delayed back-spawn in a restarted run?")
end if
call init_tau()
IF ((NMCyc /= 0) .and. (tRotateOrbs .and. (.not. tFindCINatOrbs))) then
CALL Stop_All(this_routine, "Currently not set up to rotate and then go straight into a spawning &
& calculation. Ordering of orbitals is incorrect. This may be fixed if needed.")
end if
if (tRDMonFly) then
call init_rdms(nrdms_standard, nrdms_transition)
!If the iteration specified to start filling the RDM has already been, want to
!start filling as soon as possible.
do run = 1, inum_runs
if (.not. tSinglePartPhase(run)) VaryShiftIter(run) = 0
end do
end if
! Perform all semi-stochastic initialisation. This includes generating all the states in the
! deterministic space, finding their processors, ordering them, inserting them into
! CurrentDets, calculating and storing all Hamiltonian matrix elements and initalising all
! arrays required to store and distribute the vectors in the deterministic space later.
! in the real-time application, this is done after the initial state is set up
if (tSemiStochastic .and. .not. t_real_time_fciqmc) then
if (tDynamicCoreSpace .and. tRDMonFly) then
tSemiStochastic = .false.
semistoch_shift_iter = 1
else
call init_semi_stochastic(ss_space_in, tStartedFromCoreGround)
if (tStartedFromCoreGround .and. tSetInitialRunRef) call set_initial_run_references()
end if
end if
! If the number of trial states to calculate hasn't been set by the
! user, then simply use the minimum number
if ((tTrialWavefunction .or. tStartTrialLater) .and. (ntrial_ex_calc == 0)) then
ntrial_ex_calc = inum_runs
end if
! Initialise the trial wavefunction information which can be used for the energy estimator.
! This includes generating the trial space, generating the space connected to the trial space,
! diagonalising the trial space to find the trial wavefunction and calculating the vector
! in the connected space, required for the energy estimator.
if (tRDMonFly .and. tDynamicCoreSpace .and. tTrialWavefunction) then
tTrialWavefunction = .false.
tStartTrialLater = .true.
trial_shift_iter = 1
end if
if (tTrialWavefunction) then
if (tPairedReplicas) then
call init_trial_wf(trial_space_in, ntrial_ex_calc, inum_runs.div.2, .true.)
else
call init_trial_wf(trial_space_in, ntrial_ex_calc, inum_runs, .false.)
end if
else if (tStartTrialLater) then
! If we are going to turn on the use of a trial wave function
! later in the calculation, then zero the trial estimate arrays
! for now, to prevent junk being printed before then.
trial_numerator = 0.0_dp
tot_trial_numerator = 0.0_dp
trial_denom = 0.0_dp
tot_trial_denom = 0.0_dp
init_trial_numerator = 0.0_dp
tot_init_trial_numerator = 0.0_dp
init_trial_denom = 0.0_dp
tot_init_trial_denom = 0.0_dp
trial_num_inst = 0.0_dp
tot_trial_num_inst = 0.0_dp
trial_denom_inst = 0.0_dp
tot_trial_denom_inst = 0.0_dp
end if
replica_overlaps_real(:, :) = 0.0_dp
#ifdef CMPLX_
replica_overlaps_imag(:, :) = 0.0_dp
#endif
if (t_cepa_shift) call init_cepa_shifts()
! Set up the reference space for the adi-approach
! in real-time, we do this in the real-time init
if (.not. t_real_time_fciqmc) call setup_reference_space(tReadPops)
! in fixed-n0, the variable shift mode and everything connected is
! controlled over the reference population
if (tFixedN0) then
if (tReadPops .and. .not. tWalkContGrow) then
tSkipRef = .true.
tSinglePartPhase = .false.
else
tSkipRef = .false.
tSinglePartPhase = .true.
end if
end if
if (tRDMonFly .and. tDynamicCoreSpace) call sync_rdm_sampling_iter()
! for the (uniform) 3-body excitgen, the generation probabilities are uniquely given
! by the number of alpha and beta electrons and the number of orbitals
! and can hence be precomputed
if (t_mol_3_body .or. t_ueg_3_body) call setup_mol_tc_excitgen()
if (tInitiatorSpace) call init_initiator_space(i_space_in)
if (tReplicaEstimates) then
if (.not. tPairedReplicas) then
call stop_all(this_routine, "The paired-replicas option must be used the logging &
&block, in order to calculate replica estimates.)")
end if
if (tSemiStochastic) allocate (tDetermSpawnedTo( &
cs_replicas(core_run)%determ_sizes(iProcIndex)))
call open_replica_est_file()
end if
! When should we perform death before communication?
! For integer walkers, do death before comms just so the tests don't fail (or need updating).
if (.not. tAllRealCoeff) then
tDeathBeforeComms = .true.
end if
if (t_back_spawn .or. t_back_spawn_flex) then
tDeathBeforeComms = .true.
end if
! For FCIQMC with preconditioning and a time step of 1, death will
! kill all walkers and remove them from the hash table. In this
! case, we must set the initiator flags for spawning to occupied
! determinants before this occurs.
if (tPreCond .and. (tau.isclose.1.0_dp)) tSetInitFlagsBeforeDeath = .true.
! Make sure we are performing death *after* communication, in cases
! where this is essential.
if (tPreCond .and. tDeathBeforeComms) then
call stop_all(this_routine, "With preconditioning, death must &
&be performed after communication.")
end if
if (tReplicaEstimates .and. tDeathBeforeComms) then
call stop_all(this_routine, "In order to calculate replica estimates, &
&death must be performed after communication.")
end if
if (tEN2Init .and. (.not. tTruncInitiator)) then
call stop_all(this_routine, "Cannot calculate the EN2 correction to initiator &
&error as the initiator method is not in use.")
end if
end subroutine InitFCIMCCalcPar