InitFCIMCCalcPar Subroutine

public subroutine InitFCIMCCalcPar()

Arguments

None

Contents

Source Code


Source Code

    SUBROUTINE InitFCIMCCalcPar()
        INTEGER :: ierr, iunithead
        logical :: formpops, binpops, tStartedFromCoreGround
        integer(int64) :: MemoryAlloc
        INTEGER :: error, PopsVersion
        character(*), parameter :: t_r = 'InitFCIMCPar', this_routine = t_r
        integer :: PopBlockingIter
        real(dp) :: ExpectedMemWalk, read_tau, read_psingles, read_pparallel, read_ptriples
        integer(int64) :: read_walkers_on_nodes(0:nProcessors - 1)
        integer :: read_nnodes
        !Variables from popsfile header...
        logical :: tPop64Bit, tPopHPHF, tPopLz
        integer :: iPopLenof_sign, iPopNel, iPopIter, PopNIfD, PopNIfSgn, PopNIfFlag, PopNIfTot, Popinum_runs
        integer :: PopRandomHash(2056), PopBalanceBlocks
        integer(int64) :: iPopAllTotWalkers
        integer :: i, run
        real(dp) :: PopDiagSft(1:inum_runs)
        real(dp), dimension(lenof_sign) :: PopSumNoatHF
        HElement_t(dp) :: PopAllSumENum(1:inum_runs)
        integer :: perturb_ncreate, perturb_nannihilate
        integer :: nrdms_standard, nrdms_transition
        character(255) :: identifier
        ! set default pops version, this should not have any functional impact,
        ! but prevents using it uninitialized
        !default
        Popinum_runs = 1
        ! default version for popsfiles, this does not have any functional effect,
        ! but prevents it from using uninitialized
        PopsVersion = 4

        if (tPopsAlias) then
            identifier = aliasStem
        else
            identifier = 'POPSFILE'
        end if

        if (tReadPops .and. .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then
            call open_pops_head(iunithead, formpops, binpops, identifier)
            PopsVersion = FindPopsfileVersion(iunithead)
            if (iProcIndex == root) close(iunithead)
            write(stdout, *) "POPSFILE VERSION ", PopsVersion, " detected."
        end if

        if (tReadPops .and. (PopsVersion < 3) .and. &
            .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then
!Read in particles from multiple POPSFILES for each processor
            !Ugh - need to set up ValidSpawnedList here too...
            call SetupValidSpawned(int(InitWalkers, int64))
            write(stdout, *) "Reading in initial particle configuration from *OLD* POPSFILES..."
            CALL ReadFromPopsFilePar()
        ELSE
            !Scale walker number
            !This is needed to be done here rather than later,
            !because the arrays should be allocated with appropariate sizes
            if (tReadPops .and. .not. tPopsAlreadyRead) then
                InitWalkers = InitWalkers * ScaleWalkers
            end if

!initialise the particle positions - start at HF with positive sign
!Set the maximum number of walkers allowed
            if (tReadPops .and. .not. (tPopsAlreadyRead .or. tHDF5PopsRead)) then
                !Read header.
                call open_pops_head(iunithead, formpops, binpops, identifier)
                if (PopsVersion == 3) then
                    call ReadPopsHeadv3(iunithead, tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, &
                                        iPopAllTotWalkers, PopDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, &
                                        PopNIfD, PopNIfSgn, PopNIfFlag, PopNIfTot)

                    ! The following values were not read in...
                    read_tau = 0.0_dp
                    read_nnodes = 0
                    PopBalanceBlocks = -1
                else if (PopsVersion == 4) then
                    ! The only difference between 3 & 4 is just that 4 reads
                    ! in via a namelist, so that we can add more details
                    ! whenever we want.
                    call ReadPopsHeadv4(iunithead, tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, &
                                        iPopAllTotWalkers, PopDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, &
                                        PopNIfD, PopNIfSgn, Popinum_runs, PopNIfFlag, PopNIfTot, &
                                        read_tau, PopBlockingIter, PopRandomHash, read_psingles, &
                                        read_pparallel, read_ptriples, read_nnodes, read_walkers_on_nodes, &
                                        PopBalanceBlocks)
                else
                    call stop_all(this_routine, "Popsfile version invalid")
                end if

                ! Check the number of electrons created and annihilated by the
                ! perturbation operators, if any are being used.
                if (allocated(pops_pert)) then
                    perturb_ncreate = pops_pert(1)%ncreate
                    perturb_nannihilate = pops_pert(1)%nannihilate
                else
                    perturb_ncreate = 0
                    perturb_nannihilate = 0
                end if

                call CheckPopsParams(tPop64Bit, tPopHPHF, tPopLz, iPopLenof_Sign, iPopNel, &
                                     iPopAllTotWalkers, PopDiagSft, PopSumNoatHF, PopAllSumENum, iPopIter, &
                                     PopNIfD, PopNIfSgn, PopNIfTot, &
                                     MaxWalkersUncorrected, read_tau, PopBlockingIter, read_psingles, read_pparallel, &
                                     read_ptriples, perturb_ncreate, perturb_nannihilate)

                if (iProcIndex == root) close(iunithead)
            else
                MaxWalkersUncorrected = int(InitWalkers)
            end if

            MaxWalkersPart = NINT(MemoryFacPart * MaxWalkersUncorrected)
            ExpectedMemWalk = real( (NIfTot + 1) * size_n_int + 8, dp ) * &
                real(MaxWalkersPart, dp) / 2._dp**20
            if (ExpectedMemWalk < 20.0) then
                !Increase memory allowance for small runs to a min of 20mb
                MaxWalkersPart = ceiling( 20._dp * 2._dp**20 / &
                    real((NIfTot + 1) * size_n_int + 8, dp) )
                block
                    character(*), parameter :: mem_warning = "Low memory requested for walkers, so increasing memory to 20Mb to avoid memory errors"
                    if (iProcIndex == root) write(stderr, "(A)") mem_warning
                end block
            end if
            write(stdout, "(A,I14)") "Memory allocated for a maximum particle number per node of: ", MaxWalkersPart
            !Here is where MaxSpawned is set up - do we want to set up a minimum allocation here too?
            Call SetupValidSpawned(int(InitWalkers, int64))

!Put a barrier here so all processes synchronise
            CALL MPIBarrier(error)
!Allocate memory to hold walkers
            allocate(WalkVecDets(0:NIfTot, MaxWalkersPart), stat=ierr)
            CALL LogMemAlloc('WalkVecDets', MaxWalkersPart * (NIfTot + 1), size_n_int, this_routine, WalkVecDetsTag, ierr)
            WalkVecDets(0:NIfTot, 1:MaxWalkersPart) = 0
            MemoryAlloc = (NIfTot + 1) * MaxWalkersPart * size_n_int    !Memory Allocated in bytes

            if (alloc_popsfile_dets) allocate(popsfile_dets(0:NIfTot, MaxWalkersPart), stat=ierr)

            nrdms_standard = 0
            nrdms_transition = 0

            if (tRDMOnFly) then
                if (tPairedReplicas) then
                    nrdms_standard = lenof_sign.div.2
                else
                    nrdms_standard = lenof_sign
                end if
                if (tTransitionRDMs) then
                    ! nrdms_transition_input will have been read in from the user
                    ! input. But if we have two different replicas for each state
                    ! sampled, then there are two ways to form each transition RDMs.
                    nrdms_transition = nrdms_transition_input * nreplicas
                end if
            end if

            ! Allocate storage for persistent data to be stored alongside
            ! the current determinant list (particularly diagonal matrix
            ! elements, i.e. CurrentH; now global_determinant_data).
            call init_global_det_data(nrdms_standard, nrdms_transition)

            ! If we are doing cont time, then initialise it here
            call init_cont_time()

            ! set the dummies for trial wavefunction connected space
            ! load balancing before trial wf initialization
            if (tTrialWavefunction) then
                allocate(con_send_buf(0, 0))
                con_space_size = 0
            end if

            write(stdout, "(A,I12,A)") "Spawning vectors allowing for a total of ", MaxSpawned, &
                " particles to be spawned in any one iteration per core."
            write(stdout, *) "Memory requirement ", IlutBits%len_bcast * 8.0_dp * ( &
                MaxSpawned / 1048576.0_dp), "MB"

            allocate(SpawnVec(0:IlutBits%len_bcast, MaxSpawned), &
                      stat=ierr, source=0_n_int)
block
    use constants, only: int64
    use util_mod, only: operator(.div.)
    if (ierr /= 0) then
        call stop_all(this_routine, 'Error in allocation of SpawnVec.')
    end if
call LogMemAlloc("SpawnVec", size(SpawnVec, kind=int64), int(sizeof(SpawnVec), kind=int64) .div. size(SpawnVec, kind=int64),&
    & this_routine, SpawnVecTag, ierr)
end block
            allocate(SpawnVec2(0:IlutBits%len_bcast, MaxSpawned), &
                      stat=ierr, source=0_n_int)
block
    use constants, only: int64
    use util_mod, only: operator(.div.)
    if (ierr /= 0) then
        call stop_all(this_routine, 'Error in allocation of SpawnVec2.')
    end if
call LogMemAlloc("SpawnVec2", size(SpawnVec2, kind=int64), int(sizeof(SpawnVec2), kind=int64) .div. size(SpawnVec2, kind=int64),&
    & this_routine, SpawnVec2Tag, ierr)
end block

            if (use_spawn_hash_table) then
                nhashes_spawn = ceiling(0.8 * MaxSpawned)
                allocate(spawn_ht(nhashes_spawn), stat=ierr)
                call init_hash_table(spawn_ht)
            end if

!Point at correct spawning arrays
            SpawnedParts => SpawnVec
            SpawnedParts2 => SpawnVec2

            MemoryAlloc = MemoryAlloc + (NIfTot + 1) * MaxSpawned * 2 * size_n_int

            if (tAutoAdaptiveShift) then
                allocate(SpawnInfoVec(1:SpawnInfoWidth, MaxSpawned), stat=ierr)
block
    use constants, only: int64
    use util_mod, only: operator(.div.)
    if (ierr /= 0) then
        call stop_all(this_routine, 'Error in allocation of SpawnInfoVec.')
    end if
call LogMemAlloc("SpawnInfoVec", size(SpawnInfoVec, kind=int64), int(sizeof(SpawnInfoVec), kind=int64) .div. size(SpawnInfoVec,&
    & kind=int64), this_routine, SpawnInfoVecTag, ierr)
end block
                allocate(SpawnInfoVec2(1:SpawnInfoWidth, MaxSpawned), stat=ierr)
block
    use constants, only: int64
    use util_mod, only: operator(.div.)
    if (ierr /= 0) then
        call stop_all(this_routine, 'Error in allocation of SpawnInfoVec2.')
    end if
call LogMemAlloc("SpawnInfoVec2", size(SpawnInfoVec2, kind=int64), int(sizeof(SpawnInfoVec2), kind=int64) .div.&
    & size(SpawnInfoVec2, kind=int64), this_routine, SpawnInfoVec2Tag, ierr)
end block
                SpawnInfoVec(:, :) = 0
                SpawnInfoVec2(:, :) = 0
                SpawnInfo => SpawnInfoVec
                SpawnInfo2 => SpawnInfoVec2
                MemoryAlloc = MemoryAlloc + (SpawnInfoWidth) * MaxSpawned * 2 * size_n_int
            end if

            write(stdout, "(A)") "Storing walkers in hash-table. Algorithm is now formally linear scaling with walker number"
            write(stdout, "(A,I15)") "Length of hash-table: ", nWalkerHashes
            write(stdout, "(A,F20.5)") "Length of hash-table as a fraction of targetwalkers: ", HashLengthFrac
            ! TODO: Correct the memory usage.
            !MemTemp=2*(8*(nClashMax+1)*nWalkerHashes)+8*MaxWalkersPart
            !write(stdout,"(A,F10.3,A)") "This will use ",real(MemTemp,dp)/1048576.0_dp,&
            !    " Mb of memory per process, although this is likely to increase as it expands"
            allocate(HashIndex(nWalkerHashes), stat=ierr)
            if (ierr /= 0) call stop_all(this_routine, "Error in allocation")
            do i = 1, nWalkerHashes
                HashIndex(i)%ind = 0
            end do
            !Also need to allocate memory for the freeslot array
            allocate(FreeSlot(MaxWalkersPart), stat=ierr)
            if (ierr /= 0) call stop_all(this_routine, "Error in allocation")
            freeslot(:) = 0
            !MemoryAlloc=MemoryAlloc+MemTemp

            ! Allocate pointers to the correct walker arrays
            CurrentDets => WalkVecDets

            ! Get the (0-based) processor index for the HF det.
            do run = 1, inum_runs
                iRefProc(run) = DetermineDetNode(nel, ProjEDet(:, run), 0)
            end do
            write(stdout, "(A,I8)") "Reference processor is: ", iRefProc(1)
            write(stdout, "(A)", advance='no') "Initial reference is: "
            call write_det(stdout, ProjEDet(:, 1), .true.)

            TotParts(:) = 0.0
            TotPartsOld(:) = 0.0
            NoatHF(:) = 0.0_dp
            InstNoatHF(:) = 0.0_dp

            ! Has been moved to guarantee initialization before first load balancing
            ! Initialises RDM stuff for both explicit and stochastic calculations of RDM.

            tFillingStochRDMonFly = .false.
            tFillingExplicRDMonFly = .false.
            !One of these becomes true when we have reached the relevant iteration to begin filling the RDM.

            ! initialize excitation generator
            if (t_guga_pchb) call init_guga_pchb_excitgen()

            ! If we have a popsfile, read the walkers in now.
            if (tReadPops .and. .not. tPopsAlreadyRead) then
                call InitFCIMC_pops(iPopAllTotWalkers, PopNIfSgn, iPopNel, read_nnodes, &
                                    read_walkers_on_nodes, pops_pert, &
                                    PopBalanceBLocks, PopDiagSft)
            else
                if (tStartMP1) then
                    !Initialise walkers according to mp1 amplitude.
                    call InitFCIMC_MP1()

                else if (tStartCAS) then
                    !Initialise walkers according to a CAS diagonalisation.
                    call InitFCIMC_CAS()

                else if (tTrialInit .or. (tOrthogonaliseReplicas .and. &
                                          .not. tReplicaSingleDetStart)) then

                    call InitFCIMC_trial()

                else if (tInitializeCSF) then

                    call InitFCIMC_CSF()

                else !Set up walkers on HF det

                    if (tStartSinglePart) then
                        write(stdout, "(A,I10,A,F9.3,A,I15)") "Initial number of particles set to ", int(InitialPart), &
                            " and shift will be held at ", DiagSft(1), " until particle number gets to ", int(InitWalkers * nNodes)
                    else
                        write(stdout, "(A,I16)") "Initial number of walkers per processor chosen to be: ", nint(InitWalkers)
                    end if

                    call InitFCIMC_HF()

                end if   !tStartmp1
            end if
            if (t_ueg_3_body .and. tTrcorrExgen) tLatticeGens = .false.
            write(stdout, "(A,F14.6,A)") " Initial memory (without excitgens + temp arrays) consists of : ", &
                & REAL(MemoryAlloc, dp) / 1048576.0_dp, " Mb/Processor"
            write(stdout, *) "Only one array of memory to store main particle list allocated..."
            write(stdout, *) "Initial memory allocation sucessful..."
            write(stdout, *) "============================================="
            CALL neci_flush(stdout)

        end if   !End if initial walkers method

        ! There was no last output, use the same value as for the shift update
        AllTotPartsLastOutput = AllTotPartsOld
!Put a barrier here so all processes synchronise
        CALL MPIBarrier(error)

        call init_norm()

        IF (tPrintOrbOcc) THEN
            allocate(OrbOccs(nBasis), stat=ierr)
            CALL LogMemAlloc('OrbOccs', nBasis, 8, this_routine, OrbOccsTag, ierr)
            OrbOccs(:) = 0.0_dp
        end if

        IF (tHistInitPops) THEN
            CALL InitHistInitPops()
        end if
        tPrintHighPop = .false.
        MaxInitPopPos = 0.0
        MaxInitPopNeg = 0.0

        IF (abs(MaxNoatHF) < 1.0e-12_dp) THEN
            MaxNoatHF = InitWalkers * nNodes
            HFPopThresh = int(MaxNoatHF, int64)
        end if

        ! Initialise excitation generation storage
        call init_excit_gen_store(fcimc_excit_gen_store)


        if (t_pcpp_excitgen) call init_pcpp_excitgen()

        if (t_impurity_excitgen) call setupImpurityExcitgen()
        ! [W.D.] I guess I want to initialize that before the tau-search,
        ! or otherwise some pgens get calculated incorrectly
        if (t_back_spawn .or. t_back_spawn_flex) then
            call init_back_spawn()
        end if
        ! also i should warn the user if this is a restarted run with a
        ! set delay in the back-spawning method:
        ! is there actually a use-case where someone really wants to delay
        ! a back-spawn in a restarted run?
        if (tReadPops .and. back_spawn_delay /= 0) then
            call Warning_neci(t_r, &
                              "Do you really want a delayed back-spawn in a restarted run?")
        end if

        call init_tau()

        IF ((NMCyc /= 0) .and. (tRotateOrbs .and. (.not. tFindCINatOrbs))) then
            CALL Stop_All(this_routine, "Currently not set up to rotate and then go straight into a spawning &
            & calculation.  Ordering of orbitals is incorrect.  This may be fixed if needed.")
        end if

        if (tRDMonFly) then
            call init_rdms(nrdms_standard, nrdms_transition)
            !If the iteration specified to start filling the RDM has already been, want to
            !start filling as soon as possible.
            do run = 1, inum_runs
                if (.not. tSinglePartPhase(run)) VaryShiftIter(run) = 0
            end do
        end if

        ! Perform all semi-stochastic initialisation. This includes generating all the states in the
        ! deterministic space, finding their processors, ordering them, inserting them into
        ! CurrentDets, calculating and storing all Hamiltonian matrix elements and initalising all
        ! arrays required to store and distribute the vectors in the deterministic space later.

        ! in the real-time application, this is done after the initial state is set up
        if (tSemiStochastic .and. .not. t_real_time_fciqmc) then
            if (tDynamicCoreSpace .and. tRDMonFly) then
                tSemiStochastic = .false.
                semistoch_shift_iter = 1
            else
                call init_semi_stochastic(ss_space_in, tStartedFromCoreGround)
                if (tStartedFromCoreGround .and. tSetInitialRunRef) call set_initial_run_references()
            end if
        end if

        ! If the number of trial states to calculate hasn't been set by the
        ! user, then simply use the minimum number
        if ((tTrialWavefunction .or. tStartTrialLater) .and. (ntrial_ex_calc == 0)) then
            ntrial_ex_calc = inum_runs
        end if

        ! Initialise the trial wavefunction information which can be used for the energy estimator.
        ! This includes generating the trial space, generating the space connected to the trial space,
        ! diagonalising the trial space to find the trial wavefunction and calculating the vector
        ! in the connected space, required for the energy estimator.
        if (tRDMonFly .and. tDynamicCoreSpace .and. tTrialWavefunction) then
            tTrialWavefunction = .false.
            tStartTrialLater = .true.
            trial_shift_iter = 1
        end if
        if (tTrialWavefunction) then
            if (tPairedReplicas) then
                call init_trial_wf(trial_space_in, ntrial_ex_calc, inum_runs.div.2, .true.)
            else
                call init_trial_wf(trial_space_in, ntrial_ex_calc, inum_runs, .false.)
            end if
        else if (tStartTrialLater) then
            ! If we are going to turn on the use of a trial wave function
            ! later in the calculation, then zero the trial estimate arrays
            ! for now, to prevent junk being printed before then.
            trial_numerator = 0.0_dp
            tot_trial_numerator = 0.0_dp
            trial_denom = 0.0_dp
            tot_trial_denom = 0.0_dp

            init_trial_numerator = 0.0_dp
            tot_init_trial_numerator = 0.0_dp
            init_trial_denom = 0.0_dp
            tot_init_trial_denom = 0.0_dp

            trial_num_inst = 0.0_dp
            tot_trial_num_inst = 0.0_dp
            trial_denom_inst = 0.0_dp
            tot_trial_denom_inst = 0.0_dp
        end if

        replica_overlaps_real(:, :) = 0.0_dp
#ifdef CMPLX_
        replica_overlaps_imag(:, :) = 0.0_dp
#endif

        if (t_cepa_shift) call init_cepa_shifts()

        ! Set up the reference space for the adi-approach
        ! in real-time, we do this in the real-time init
        if (.not. t_real_time_fciqmc) call setup_reference_space(tReadPops)

        ! in fixed-n0, the variable shift mode and everything connected is
        ! controlled over the reference population
        if (tFixedN0) then
            if (tReadPops .and. .not. tWalkContGrow) then
                tSkipRef = .true.
                tSinglePartPhase = .false.
            else
                tSkipRef = .false.
                tSinglePartPhase = .true.
            end if
        end if

        if (tRDMonFly .and. tDynamicCoreSpace) call sync_rdm_sampling_iter()

        ! for the (uniform) 3-body excitgen, the generation probabilities are uniquely given
        ! by the number of alpha and beta electrons and the number of orbitals
        ! and can hence be precomputed
        if (t_mol_3_body .or. t_ueg_3_body) call setup_mol_tc_excitgen()

        if (tInitiatorSpace) call init_initiator_space(i_space_in)

        if (tReplicaEstimates) then
            if (.not. tPairedReplicas) then
                call stop_all(this_routine, "The paired-replicas option must be used the logging &
                                            &block, in order to calculate replica estimates.)")
            end if

            if (tSemiStochastic) allocate (tDetermSpawnedTo( &
                                           cs_replicas(core_run)%determ_sizes(iProcIndex)))

            call open_replica_est_file()
        end if

        ! When should we perform death before communication?
        ! For integer walkers, do death before comms just so the tests don't fail (or need updating).
        if (.not. tAllRealCoeff) then
            tDeathBeforeComms = .true.
        end if
        if (t_back_spawn .or. t_back_spawn_flex) then
            tDeathBeforeComms = .true.
        end if

        ! For FCIQMC with preconditioning and a time step of 1, death will
        ! kill all walkers and remove them from the hash table. In this
        ! case, we must set the initiator flags for spawning to occupied
        ! determinants before this occurs.
        if (tPreCond .and. (tau.isclose.1.0_dp)) tSetInitFlagsBeforeDeath = .true.

        ! Make sure we are performing death *after* communication, in cases
        ! where this is essential.
        if (tPreCond .and. tDeathBeforeComms) then
            call stop_all(this_routine, "With preconditioning, death must &
                                        &be performed after communication.")
        end if
        if (tReplicaEstimates .and. tDeathBeforeComms) then
            call stop_all(this_routine, "In order to calculate replica estimates, &
                                        &death must be performed after communication.")
        end if
        if (tEN2Init .and. (.not. tTruncInitiator)) then
            call stop_all(this_routine, "Cannot calculate the EN2 correction to initiator &
                                        &error as the initiator method is not in use.")
        end if

    end subroutine InitFCIMCCalcPar