SUBROUTINE InitFCIMCCalcPar() INTEGER :: ierr, iunithead logical :: formpops, binpops, tStartedFromCoreGround integer(int64) :: MemoryAlloc INTEGER :: error, PopsVersion character(*), parameter :: t_r = 'InitFCIMCPar', this_routine = t_r integer :: PopBlockingIter real(dp) :: ExpectedMemWalk, read_tau, read_psingles, read_pparallel, read_ptriples integer(int64) :: read_walkers_on_nodes(0:nProcessors - 1) integer :: read_nnodes !Variables from popsfile header... logical :: tPop64Bit, tPopHPHF, tPopLz integer :: iPopLenof_sign, iPopNel, iPopIter, PopNIfD, PopNIfSgn, PopNIfFlag, PopNIfTot, Popinum_runs integer :: PopRandomHash(2056), PopBalanceBlocks integer(int64) :: iPopAllTotWalkers integer :: i, run real(dp) :: PopDiagSft(1:inum_runs), PopModDiagSft(1:inum_runs) real(dp), dimension(lenof_sign) :: PopSumNoatHF HElement_t(dp) :: PopAllSumENum(1:inum_runs) integer :: perturb_ncreate, perturb_nannihilate integer :: nrdms_standard, nrdms_transition character(255) :: identifier ! set default pops version, this should not have any functional impact, ! but prevents using it uninitialized !default Popinum_runs = 1 ! default version for popsfiles, this does not have any functional effect, ! but prevents it from using uninitialized PopsVersion = 4 if (tPopsAlias) then identifier = aliasStem else identifier = 'POPSFILE' end if if (tReadPops .and. .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then call open_pops_head(iunithead, formpops, binpops, identifier) PopsVersion = FindPopsfileVersion(iunithead) if (iProcIndex == root) close(iunithead) write(stdout, *) "POPSFILE VERSION ", PopsVersion, " detected." end if if (tReadPops .and. (PopsVersion < 3) .and. & .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then !Read in particles from multiple POPSFILES for each processor !Ugh - need to set up ValidSpawnedList here too... call SetupValidSpawned(int(InitWalkers, int64)) write(stdout, *) "Reading in initial particle configuration from *OLD* POPSFILES..." CALL ReadFromPopsFilePar() ELSE !Scale walker number !This is needed to be done here rather than later, !because the arrays should be allocated with appropariate sizes if (tReadPops .and. .not. tPopsAlreadyRead) then InitWalkers = InitWalkers * ScaleWalkers end if !initialise the particle positions - start at HF with positive sign !Set the maximum number of walkers allowed if (tReadPops .and. .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then !Read header. call open_pops_head(iunithead, formpops, binpops, identifier) if (PopsVersion == 3) then call ReadPopsHeadv3(iunithead, tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, & iPopAllTotWalkers, PopDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, & PopNIfD, PopNIfSgn, PopNIfFlag, PopNIfTot) ! The following values were not read in... read_tau = 0.0_dp read_nnodes = 0 PopBalanceBlocks = -1 else if (PopsVersion == 4) then ! The only difference between 3 & 4 is just that 4 reads ! in via a namelist, so that we can add more details ! whenever we want. call ReadPopsHeadv4(iunithead, tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, & iPopAllTotWalkers, PopDiagSft, PopModDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, & PopNIfD, PopNIfSgn, Popinum_runs, PopNIfFlag, PopNIfTot, & read_tau, PopBlockingIter, PopRandomHash, read_psingles, & read_pparallel, read_ptriples, read_nnodes, read_walkers_on_nodes, & PopBalanceBlocks) else call stop_all(this_routine, "Popsfile version invalid") end if ! Check the number of electrons created and annihilated by the ! perturbation operators, if any are being used. if (allocated(pops_pert)) then perturb_ncreate = pops_pert(1)%ncreate perturb_nannihilate = pops_pert(1)%nannihilate else perturb_ncreate = 0 perturb_nannihilate = 0 end if call CheckPopsParams(tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, & iPopAllTotWalkers, PopDiagSft, PopModDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, & PopNIfD, PopNIfSgn, PopNIfTot, & MaxWalkersUncorrected, read_tau, PopBlockingIter, read_psingles, read_pparallel, & read_ptriples, perturb_ncreate, perturb_nannihilate) if (iProcIndex == root) close(iunithead) else MaxWalkersUncorrected = int(InitWalkers) end if MaxWalkersPart = NINT(MemoryFacPart * MaxWalkersUncorrected) ExpectedMemWalk = real( (NIfTot + 1) * size_n_int + 8, dp ) * & real(MaxWalkersPart, dp) / 2._dp**20 if (ExpectedMemWalk < 20.0) then !Increase memory allowance for small runs to a min of 20mb MaxWalkersPart = ceiling( 20._dp * 2._dp**20 / & real((NIfTot + 1) * size_n_int + 8, dp) ) block character(*), parameter :: mem_warning = "Low memory requested for walkers, so increasing memory to 20Mb to avoid memory errors" if (iProcIndex == root) write(stderr, "(A)") mem_warning end block end if write(stdout, "(A,I14)") "Memory allocated for a maximum particle number per node of: ", MaxWalkersPart !Here is where MaxSpawned is set up - do we want to set up a minimum allocation here too? Call SetupValidSpawned(int(InitWalkers, int64)) !Put a barrier here so all processes synchronise CALL MPIBarrier(error) !Allocate memory to hold walkers allocate(WalkVecDets(0:NIfTot, MaxWalkersPart), stat=ierr) CALL LogMemAlloc('WalkVecDets', MaxWalkersPart * (NIfTot + 1), size_n_int, this_routine, WalkVecDetsTag, ierr) WalkVecDets(0:NIfTot, 1:MaxWalkersPart) = 0 MemoryAlloc = (NIfTot + 1) * MaxWalkersPart * size_n_int !Memory Allocated in bytes if (alloc_popsfile_dets) allocate(popsfile_dets(0:NIfTot, MaxWalkersPart), stat=ierr) nrdms_standard = 0 nrdms_transition = 0 if (tRDMOnFly) then if (tPairedReplicas) then nrdms_standard = lenof_sign.div.2 else nrdms_standard = lenof_sign end if if (tTransitionRDMs) then ! nrdms_transition_input will have been read in from the user ! input. But if we have two different replicas for each state ! sampled, then there are two ways to form each transition RDMs. nrdms_transition = nrdms_transition_input * nreplicas end if end if ! Allocate storage for persistent data to be stored alongside ! the current determinant list (particularly diagonal matrix ! elements, i.e. CurrentH; now global_determinant_data). call init_global_det_data(nrdms_standard, nrdms_transition) ! If we are doing cont time, then initialise it here call init_cont_time() ! set the dummies for trial wavefunction connected space ! load balancing before trial wf initialization if (tTrialWavefunction) then allocate(con_send_buf(0, 0)) con_space_size = 0 end if write(stdout, "(A,I12,A)") "Spawning vectors allowing for a total of ", MaxSpawned, & " particles to be spawned in any one iteration per core." write(stdout, *) "Memory requirement ", IlutBits%len_bcast * 8.0_dp * ( & MaxSpawned / 1048576.0_dp), "MB" allocate(SpawnVec(0 : IlutBits%len_bcast, MaxSpawned), & stat=ierr, source=0_n_int) block use constants, only: int64 use util_mod, only: operator(.div.) if (ierr /= 0) then call stop_all(this_routine, 'Error in allocation of SpawnVec.') end if call LogMemAlloc("SpawnVec", size(SpawnVec, kind=int64), int(sizeof(SpawnVec), kind=int64) .div. size(SpawnVec, kind=int64),& & this_routine, SpawnVecTag, ierr) end block allocate(SpawnVec2(0 : IlutBits%len_bcast, MaxSpawned), & stat=ierr, source=0_n_int) block use constants, only: int64 use util_mod, only: operator(.div.) if (ierr /= 0) then call stop_all(this_routine, 'Error in allocation of SpawnVec2.') end if call LogMemAlloc("SpawnVec2", size(SpawnVec2, kind=int64), int(sizeof(SpawnVec2), kind=int64) .div. size(SpawnVec2, kind=int64),& & this_routine, SpawnVec2Tag, ierr) end block if (use_spawn_hash_table) then nhashes_spawn = ceiling(0.8 * MaxSpawned) allocate(spawn_ht(nhashes_spawn), stat=ierr) call init_hash_table(spawn_ht) end if !Point at correct spawning arrays SpawnedParts => SpawnVec SpawnedParts2 => SpawnVec2 MemoryAlloc = MemoryAlloc + (NIfTot + 1) * MaxSpawned * 2 * size_n_int if (tAutoAdaptiveShift) then allocate(SpawnInfoVec(1:SpawnInfoWidth, MaxSpawned), stat=ierr) block use constants, only: int64 use util_mod, only: operator(.div.) if (ierr /= 0) then call stop_all(this_routine, 'Error in allocation of SpawnInfoVec.') end if call LogMemAlloc("SpawnInfoVec", size(SpawnInfoVec, kind=int64), int(sizeof(SpawnInfoVec), kind=int64) .div. size(SpawnInfoVec,& & kind=int64), this_routine, SpawnInfoVecTag, ierr) end block allocate(SpawnInfoVec2(1:SpawnInfoWidth, MaxSpawned), stat=ierr) block use constants, only: int64 use util_mod, only: operator(.div.) if (ierr /= 0) then call stop_all(this_routine, 'Error in allocation of SpawnInfoVec2.') end if call LogMemAlloc("SpawnInfoVec2", size(SpawnInfoVec2, kind=int64), int(sizeof(SpawnInfoVec2), kind=int64) .div.& & size(SpawnInfoVec2, kind=int64), this_routine, SpawnInfoVec2Tag, ierr) end block SpawnInfoVec(:, :) = 0 SpawnInfoVec2(:, :) = 0 SpawnInfo => SpawnInfoVec SpawnInfo2 => SpawnInfoVec2 MemoryAlloc = MemoryAlloc + (SpawnInfoWidth) * MaxSpawned * 2 * size_n_int end if write(stdout, "(A)") "Storing walkers in hash-table. Algorithm is now formally linear scaling with walker number" write(stdout, "(A,I15)") "Length of hash-table: ", nWalkerHashes write(stdout, "(A,F20.5)") "Length of hash-table as a fraction of targetwalkers: ", HashLengthFrac ! TODO: Correct the memory usage. !MemTemp=2*(8*(nClashMax+1)*nWalkerHashes)+8*MaxWalkersPart !write(stdout,"(A,F10.3,A)") "This will use ",real(MemTemp,dp)/1048576.0_dp,& ! " Mb of memory per process, although this is likely to increase as it expands" allocate(HashIndex(nWalkerHashes), stat=ierr) if (ierr /= 0) call stop_all(this_routine, "Error in allocation") do i = 1, nWalkerHashes HashIndex(i)%ind = 0 end do !Also need to allocate memory for the freeslot array allocate(FreeSlot(MaxWalkersPart), stat=ierr) if (ierr /= 0) call stop_all(this_routine, "Error in allocation") freeslot(:) = 0 !MemoryAlloc=MemoryAlloc+MemTemp ! Allocate pointers to the correct walker arrays CurrentDets => WalkVecDets ! Get the (0-based) processor index for the HF det. do run = 1, inum_runs iRefProc(run) = DetermineDetNode(nel, ProjEDet(:, run), 0) end do write(stdout, "(A,I8)") "Reference processor is: ", iRefProc(1) write(stdout, "(A)", advance='no') "Initial reference is: " call write_det(stdout, ProjEDet(:, 1), .true.) if (tEstHilbSpaceMC) call FindHilbSizeTruncHubb() TotParts(:) = 0.0 TotPartsOld(:) = 0.0 NoatHF(:) = 0.0_dp InstNoatHF(:) = 0.0_dp ! Has been moved to guarantee initialization before first load balancing ! Initialises RDM stuff for both explicit and stochastic calculations of RDM. tFillingStochRDMonFly = .false. tFillingExplicRDMonFly = .false. !One of these becomes true when we have reached the relevant iteration to begin filling the RDM. ! If we have a popsfile, read the walkers in now. if (tReadPops .and. .not. tPopsAlreadyRead) then call InitFCIMC_pops(iPopAllTotWalkers, PopNIfSgn, iPopNel, read_nnodes, & read_walkers_on_nodes, pops_pert, & PopBalanceBLocks, PopDiagSft) else if (tStartMP1) then !Initialise walkers according to mp1 amplitude. call InitFCIMC_MP1() else if (tStartCAS) then !Initialise walkers according to a CAS diagonalisation. call InitFCIMC_CAS() else if (tTrialInit .or. (tOrthogonaliseReplicas .and. & .not. tReplicaSingleDetStart)) then call InitFCIMC_trial() else if (tInitializeCSF) then call InitFCIMC_CSF() else !Set up walkers on HF det if (tStartSinglePart) then write(stdout, "(A,I10,A,F9.3,A,I15)") "Initial number of particles set to ", int(InitialPart), & " and shift will be held at ", DiagSft(1), " until particle number gets to ", int(InitWalkers * nNodes) else write(stdout, "(A,I16)") "Initial number of walkers per processor chosen to be: ", nint(InitWalkers) end if call InitFCIMC_HF() end if !tStartmp1 end if if (t_ueg_3_body .and. tTrcorrExgen) tLatticeGens = .false. write(stdout, "(A,F14.6,A)") " Initial memory (without excitgens + temp arrays) consists of : ", & REAL(MemoryAlloc, dp) / 1048576.0_dp, " Mb/Processor" write(stdout, *) "Only one array of memory to store main particle list allocated..." write(stdout, *) "Initial memory allocation sucessful..." write(stdout, *) "=============================================" CALL neci_flush(stdout) end if !End if initial walkers method ! There was no last output, use the same value as for the shift update AllTotPartsLastOutput = AllTotPartsOld !Put a barrier here so all processes synchronise CALL MPIBarrier(error) call init_norm() IF (tPrintOrbOcc) THEN allocate(OrbOccs(nBasis), stat=ierr) CALL LogMemAlloc('OrbOccs', nBasis, 8, this_routine, OrbOccsTag, ierr) OrbOccs(:) = 0.0_dp end if IF (tHistInitPops) THEN CALL InitHistInitPops() end if tPrintHighPop = .false. MaxInitPopPos = 0.0 MaxInitPopNeg = 0.0 IF (abs(MaxNoatHF) < 1.0e-12_dp) THEN MaxNoatHF = InitWalkers * nNodes HFPopThresh = int(MaxNoatHF, int64) end if ! Initialise excitation generation storage call init_excit_gen_store(fcimc_excit_gen_store) if (t_pcpp_excitgen) call init_pcpp_excitgen() if (t_impurity_excitgen) call setupImpurityExcitgen() ! [W.D.] I guess I want to initialize that before the tau-search, ! or otherwise some pgens get calculated incorrectly if (t_back_spawn .or. t_back_spawn_flex) then call init_back_spawn() end if ! also i should warn the user if this is a restarted run with a ! set delay in the back-spawning method: ! is there actually a use-case where someone really wants to delay ! a back-spawn in a restarted run? if (tReadPops .and. back_spawn_delay /= 0) then call Warning_neci(t_r, & "Do you really want a delayed back-spawn in a restarted run?") end if call init_tau() IF ((NMCyc /= 0) .and. (tRotateOrbs .and. (.not. tFindCINatOrbs))) then CALL Stop_All(this_routine, "Currently not set up to rotate and then go straight into a spawning " & // " calculation. Ordering of orbitals is incorrect. This may be fixed if needed.") end if if (tRDMonFly) then call init_rdms(nrdms_standard, nrdms_transition) !If the iteration specified to start filling the RDM has already been, want to !start filling as soon as possible. do run = 1, inum_runs if (.not. tSinglePartPhase(run)) VaryShiftIter(run) = 0 end do end if ! Perform all semi-stochastic initialisation. This includes generating all the states in the ! deterministic space, finding their processors, ordering them, inserting them into ! CurrentDets, calculating and storing all Hamiltonian matrix elements and initalising all ! arrays required to store and distribute the vectors in the deterministic space later. ! in the real-time application, this is done after the initial state is set up if (tSemiStochastic .and. .not. t_real_time_fciqmc) then if (tDynamicCoreSpace .and. tRDMonFly) then tSemiStochastic = .false. semistoch_shift_iter = 1 else call init_semi_stochastic(ss_space_in, tStartedFromCoreGround) if (tStartedFromCoreGround .and. tSetInitialRunRef) call set_initial_run_references() end if end if ! If the number of trial states to calculate hasn't been set by the ! user, then simply use the minimum number if ((tTrialWavefunction .or. tStartTrialLater) .and. (ntrial_ex_calc == 0)) then ntrial_ex_calc = inum_runs end if ! Initialise the trial wavefunction information which can be used for the energy estimator. ! This includes generating the trial space, generating the space connected to the trial space, ! diagonalising the trial space to find the trial wavefunction and calculating the vector ! in the connected space, required for the energy estimator. if (tRDMonFly .and. tDynamicCoreSpace .and. tTrialWavefunction) then tTrialWavefunction = .false. tStartTrialLater = .true. trial_shift_iter = 1 end if if (tTrialWavefunction) then if (tPairedReplicas) then call init_trial_wf(trial_space_in, ntrial_ex_calc, inum_runs.div.2, .true.) else call init_trial_wf(trial_space_in, ntrial_ex_calc, inum_runs, .false.) end if else if (tStartTrialLater) then ! If we are going to turn on the use of a trial wave function ! later in the calculation, then zero the trial estimate arrays ! for now, to prevent junk being printed before then. trial_numerator = 0.0_dp tot_trial_numerator = 0.0_dp trial_denom = 0.0_dp tot_trial_denom = 0.0_dp init_trial_numerator = 0.0_dp tot_init_trial_numerator = 0.0_dp init_trial_denom = 0.0_dp tot_init_trial_denom = 0.0_dp trial_num_inst = 0.0_dp tot_trial_num_inst = 0.0_dp trial_denom_inst = 0.0_dp tot_trial_denom_inst = 0.0_dp end if replica_overlaps_real(:, :) = 0.0_dp #ifdef CMPLX_ replica_overlaps_imag(:, :) = 0.0_dp #endif if (t_cepa_shift) call init_cepa_shifts() ! Set up the reference space for the adi-approach ! in real-time, we do this in the real-time init if (.not. t_real_time_fciqmc) call setup_reference_space(tReadPops) ! in fixed-n0, the variable shift mode and everything connected is ! controlled over the reference population if (tFixedN0) then if (tReadPops .and. .not. tWalkContGrow) then tSkipRef = .true. tSinglePartPhase = .false. else tSkipRef = .false. tSinglePartPhase = .true. end if end if if (tRDMonFly .and. tDynamicCoreSpace) call sync_rdm_sampling_iter() ! for the (uniform) 3-body excitgen, the generation probabilities are uniquely given ! by the number of alpha and beta electrons and the number of orbitals ! and can hence be precomputed if (t_mol_3_body .or. t_ueg_3_body) call setup_mol_tc_excitgen() if (tInitiatorSpace) call init_initiator_space(i_space_in) if (tReplicaEstimates) then if (.not. tPairedReplicas) then call stop_all(this_routine, "The paired-replicas option must be used the logging " & // "block, in order to calculate replica estimates.)") end if if (.not. (tSemiStochastic .or. (semistoch_shift_iter > 0))) then call stop_all(this_routine, & "replica-estimates need semi-stochastic option!") end if if (tSemiStochastic) then allocate (tDetermSpawnedTo( & cs_replicas(core_run)%determ_sizes(iProcIndex))) end if call open_replica_est_file() end if ! When should we perform death before communication? ! For integer walkers, do death before comms just so the tests don't fail (or need updating). if (.not. tAllRealCoeff) then tDeathBeforeComms = .true. end if if (t_back_spawn .or. t_back_spawn_flex) then tDeathBeforeComms = .true. end if ! For FCIQMC with preconditioning and a time step of 1, death will ! kill all walkers and remove them from the hash table. In this ! case, we must set the initiator flags for spawning to occupied ! determinants before this occurs. if (tPreCond .and. (tau.isclose.1.0_dp)) tSetInitFlagsBeforeDeath = .true. ! Make sure we are performing death *after* communication, in cases ! where this is essential. if (tPreCond .and. tDeathBeforeComms) then call stop_all(this_routine, "With preconditioning, death must " & // "be performed after communication.") end if if (tReplicaEstimates .and. tDeathBeforeComms) then call stop_all(this_routine, "In order to calculate replica estimates, " & // "death must be performed after communication.") end if if (tEN2Init .and. (.not. tTruncInitiator)) then call stop_all(this_routine, "Cannot calculate the EN2 correction to initiator " & // "error as the initiator method is not in use.") end if if (tBlockSpawns) then call init_block_list(& blocklist_name, block_list, blocklist_hash_table) end if if (tFullProjE) then allocate(connected_dets(0:NIfTot, max_connected_size), stat=ierr) call LogMemAlloc("connected_dets", max_connected_size * (NIfTot + 1), size_n_int, t_r, ConnectedTag, ierr) end if if (tBosonNoSpawn) then call ReadBosonicPopsfile(bosonic_popsfile) SftDamp2 = SftDamp**2 / 4._dp endif if (tInitExpandAnnihil) then tInitProbation = .false. endif end subroutine InitFCIMCCalcPar