init_rdms Subroutine

public subroutine init_rdms(nrdms_standard, nrdms_transition)

Arguments

Type IntentOptional Attributes Name
integer, intent(in) :: nrdms_standard
integer, intent(in) :: nrdms_transition

Contents

Source Code


Source Code

    subroutine init_rdms(nrdms_standard, nrdms_transition)

        use DeterminantData, only: write_det
        use CalcData, only: MemoryFacPart, tEN2
        use FciMCData, only: MaxSpawned, Spawned_Parents, Spawned_Parents_Index
        use FciMCData, only: Spawned_ParentsTag, Spawned_Parents_IndexTag
        use FciMCData, only: HFDet_True, tSinglePartPhase, AvNoatHF, IterRDM_HF
        use global_det_data, only: len_av_sgn_tot, len_iter_occ_tot
        use LoggingData, only: tDo_Not_Calc_2RDM_est, RDMExcitLevel, tExplicitAllRDM
        use LoggingData, only: tDiagRDM, tDumpForcesInfo, tDipoles, tPrint1RDM
        use LoggingData, only: tReadRDMs, tPopsfile, tno_RDMs_to_read
        use LoggingData, only: twrite_RDMs_to_read, tPrint1RDMsFrom2RDMPops
        use LoggingData, only: tPrint1RDMsFromSpinfree, t_spin_resolved_rdms
        use rdm_data, only: rdm_estimates, one_rdms, two_rdm_spawn, two_rdm_main, two_rdm_recv
        use rdm_data, only: two_rdm_recv_2, tOpenShell, print_2rdm_est, Sing_ExcDjs, Doub_ExcDjs
        use rdm_data, only: Sing_ExcDjs2, Doub_ExcDjs2, Sing_ExcDjsTag, Doub_ExcDjsTag
        use rdm_data, only: Sing_ExcDjs2Tag, Doub_ExcDjs2Tag, OneEl_Gap, TwoEl_Gap
        use rdm_data, only: Sing_InitExcSlots, Doub_InitExcSlots, Sing_ExcList, Doub_ExcList
        use rdm_data, only: nElRDM_Time, FinaliseRDMs_time, RDMEnergy_time, states_for_transition_rdm
        use rdm_data, only: rdm_main_size_fac, rdm_spawn_size_fac, rdm_recv_size_fac
        use rdm_data, only: rdm_definitions, en_pert_main, inits_estimates, tOpenSpatialOrbs
        use rdm_data_utils, only: init_rdm_spawn_t, init_rdm_list_t, init_one_rdm_t
        use rdm_data_utils, only: init_rdm_definitions_t, clear_one_rdms, clear_rdm_list_t
        use rdm_data_utils, only: init_en_pert_t
        use rdm_estimators, only: init_rdm_estimates_t, calc_2rdm_estimates_wrapper
        use RotateOrbsData, only: SymLabelCounts2_rot, SymLabelList2_rot, SymLabelListInv_rot
        use RotateOrbsData, only: SymLabelCounts2_rotTag, SymLabelList2_rotTag, NoOrbs
        use RotateOrbsData, only: SymLabelListInv_rotTag, SpatOrbs, NoSymLabelCounts
        use SystemData, only: tStoreSpinOrbs, tHPHF, tFixLz, iMaxLz, tROHF

        integer, intent(in) :: nrdms_standard, nrdms_transition

        integer :: nrdms
        integer :: rdm_nrows, nhashes_rdm_main, nhashes_rdm_spawn
        integer :: standard_spawn_size, min_spawn_size
        integer :: max_nelems_main, max_nelems_spawn, max_nelems_recv, max_nelems_recv_2
        integer(int64) :: memory_alloc, main_mem, spawn_mem, recv_mem
        integer :: ndets_en_pert, nhashes_en_pert
        integer :: irdm, iproc, ierr
        character(len=*), parameter :: t_r = 'init_rdms'

#ifdef CMPLX_
        call stop_all(t_r, 'Filling of reduced density matrices not working with complex walkers yet.')
#endif

        nrdms = nrdms_standard + nrdms_transition

        ! Only spatial orbitals for the 2-RDMs (and F12).
        if (tStoreSpinOrbs .and. RDMExcitLevel /= 1) then
            call stop_all(t_r, '2-RDM calculations not set up for systems stored as spin orbitals.')
        end if

        if (tROHF .or. tStoreSpinOrbs .or. t_spin_resolved_rdms) then
            tOpenShell = .true.
        else
            tOpenShell = .false.
        end if
        ! it is possible to have open-shell systems with spatial orbitals,
        ! these have to be indexed differently
        tOpenSpatialOrbs = tOpenShell .and. .not. tStoreSpinOrbs

        if (tExplicitAllRDM) then
            write(stdout, '(1X,"Explicitly calculating the reduced density matrices from the FCIQMC wavefunction.")')
        else
            write(stdout, '(1X,"Stochastically calculating the reduced density matrices from the FCIQMC wavefunction")')
            write(stdout, '(1X,"incl. explicit connections to the following HF determinant:")', advance='no')
            call write_det(stdout, HFDet_True, .true.)
        end if

        if (RDMExcitLevel == 1) then
            print_2rdm_est = .false.
        else
            ! If the RDMExcitLevel is 2 or 3 - and we're calculating the 2-RDM,
            ! then we automatically calculate the energy (and other estimates!)
            ! unless we're specifically told not to.
            if (tDo_Not_Calc_2RDM_est) then
                print_2rdm_est = .false.
            else
                print_2rdm_est = .true.
                write (stdout, '(1X,"Calculating the energy from the reduced density matrix. &
                              &This requires the 2 electron RDM from which the 1-RDM can also be constructed.")')
            end if
        end if

        call init_rdm_definitions_t(rdm_definitions, nrdms_standard, nrdms_transition, states_for_transition_rdm)
        if (tinitsRDM) call init_rdm_definitions_t( &
            rdm_inits_defs, nrdms_standard, nrdms_transition, states_for_transition_rdm, 'Inits_TwoRDM')

        ! Allocate arrays for holding averaged signs and block lengths for the
        ! HF determinant.
        allocate(AvNoatHF(len_av_sgn_tot))
        AvNoatHF = 0.0_dp
        allocate(IterRDM_HF(len_iter_occ_tot))
        IterRDM_HF = 0

        ! Have not got HPHF working with the explicit or truncated methods yet.
        ! Neither of these would be too difficult to implement.
        if (tHPHF .and. tExplicitAllRDM) call stop_all(t_r, 'HPHF not set up with the explicit calculation of the RDM.')

        if (tDipoles) then
            write (stdout, '("WARNING - The calculation of dipole moments is currently not supported for the new RDM code. &
                      &Use the OLDRDMS option to use feature.")')
        end if

        SpatOrbs = nbasis / 2
        if (tOpenShell) then
            NoOrbs = nbasis
        else
            NoOrbs = SpatOrbs
        end if

        ! The memory of (large) alloctaed arrays, per MPI process.
        memory_alloc = 0_int64

        ! For now, create RDM array big enough so that *all* RDM elements on
        ! a particular processor can be stored, using the usual approximations
        ! to take symmetry into account. Include a factor of 1.5 to account for
        ! factors such as imperfect load balancing (which affects the spawned
        ! array).
        rdm_nrows = nbasis * (nbasis - 1) / 2
        max_nelems_main = int(1.5_dp * real(rdm_nrows**2, dp) / (8._dp * nProcessors) * rdm_main_size_fac)
        nhashes_rdm_main = int(0.75 * max_nelems_main * rdm_main_size_fac)

        main_mem = max_nelems_main * (nrdms + 1) * size_int_rdm
        if (tinitsRDM) main_mem = 2 * main_mem
        write(stdout, '(/,1X,"About to allocate main RDM array, size per MPI process (MB):", f14.6)') real(main_mem, dp) / 1048576.0_dp
        call init_rdm_list_t(two_rdm_main, nrdms, max_nelems_main, nhashes_rdm_main)
        if (tinitsRDM) then
            call init_rdm_list_t(two_rdm_inits, nrdms, max_nelems_main, nhashes_rdm_main)
        end if
        write(stdout, '(1X,"Allocation of main RDM array complete.")')

        ! Factor of 10 over perfectly distributed size, for some safety.
        standard_spawn_size = int(10_int64 * rdm_nrows**2_int64 / (8_int64 * nProcessors))
        ! For cases where we have a small number of orbitals but large number
        ! of processors (i.e., large CASSCF calculations), we may find the
        ! above standard_spawn_size is less than nProcessors. Thus, there
        ! would not be at least one spawning slot per processor. In such cases
        ! make sure that we have at least 50 per processor, for some safety.
        min_spawn_size = int(50_int64 * nProcessors)
        max_nelems_spawn = max(standard_spawn_size, min_spawn_size) * int(rdm_spawn_size_fac)
        nhashes_rdm_spawn = int(0.75 * max_nelems_spawn * rdm_spawn_size_fac)

        spawn_mem = max_nelems_spawn * (nrdms + 1) * size_int_rdm
        if (tinitsRDM) spawn_mem = 2 * spawn_mem
        write(stdout, '(1X,"About to allocate RDM spawning array, size per MPI process (MB):", f14.6)') real(spawn_mem, dp) / 1048576.0_dp
        call init_rdm_spawn_t(two_rdm_spawn, rdm_nrows, nrdms, max_nelems_spawn, nhashes_rdm_spawn)
        if (tinitsRDM) then
            call init_rdm_spawn_t(two_rdm_inits_spawn, rdm_nrows, nrdms, max_nelems_spawn, &
                                  nhashes_rdm_spawn)
        end if
        write(stdout, '(1X,"Allocation of RDM spawning array complete.")')

        max_nelems_recv = 4 * rdm_nrows**2 / (8 * nProcessors) * int(rdm_recv_size_fac)
        max_nelems_recv_2 = 2 * rdm_nrows**2 / (8 * nProcessors) * int(rdm_recv_size_fac)

        recv_mem = (max_nelems_recv + max_nelems_recv_2) * (nrdms + 1) * size_int_rdm
        write(stdout, '(1X,"About to allocate RDM receiving arrays, size per MPI process (MB):", f14.6)') real(recv_mem, dp) / 1048576.0_dp
        ! Don't need the hash table for the received list, so pass 0 for nhashes.
        call init_rdm_list_t(two_rdm_recv, nrdms, max_nelems_recv, 0)
        call init_rdm_list_t(two_rdm_recv_2, nrdms, max_nelems_recv_2, 0)
        write(stdout, '(1X,"Allocation of RDM receiving arrays complete.",/)')

        ! Count the memory the various RDM lists (but this does *not* count
        ! the memory of the hash tables - this will increase dynamically
        ! throughout the simulation).
        memory_alloc = memory_alloc + main_mem + spawn_mem + recv_mem

        call init_rdm_estimates_t(rdm_estimates, nrdms_standard, nrdms_transition, print_2rdm_est)
        if (tOutputInitsRDM .or. tInitsRDMRef) &
            call init_rdm_estimates_t(inits_estimates, nrdms_standard, &
                                      nrdms_transition, print_2rdm_est, 'InitsRDMEstimates')

        ! Initialise 1-RDM objects.
        if (RDMExcitLevel == 1 .or. RDMExcitLevel == 3 .or. &
            tDiagRDM .or. tPrint1RDM .or. tDumpForcesInfo .or. tDipoles) then

            allocate(one_rdms(nrdms), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating one_rdms array.')

            if (tinitsRDM) then
                allocate(inits_one_rdms(nrdms), stat=ierr)
                if (ierr /= 0) call stop_all(t_r, 'Problem allocating one_rdms array.')
            end if
            do irdm = 1, nrdms
                call init_one_rdm_t(one_rdms(irdm), NoOrbs)

                memory_alloc = memory_alloc + (NoOrbs * NoOrbs * 8)
                if (tinitsRDM) then
                    call init_one_rdm_t(inits_one_rdms(irdm), NoOrbs)
                    memory_alloc = memory_alloc + (NoOrbs * NoOrbs * 8)
                end if
            end do
        end if

        if (tEN2) then
            ! Initialise Epstein-Nesbet perturbation object.
            ndets_en_pert = MaxSpawned
            nhashes_en_pert = int(0.8 * MaxSpawned)
            call init_en_pert_t(en_pert_main, nrdms_standard, ndets_en_pert, nhashes_en_pert)
        end if

        ! We then need to allocate the arrays for excitations etc when doing
        ! the explicit all calculation.
        if (tExplicitAllRDM) then
            ! We always calculate the single stuff - and if RDMExcitLevel is 1,
            ! this is all, otherwise calculate the double stuff too.

            ! This array actually contains the excitations in blocks of the
            ! processor they will be sent to. Only needed if the 1-RDM is the
            ! only thing being calculated.
            if (tGUGA) then
                ! in the GUGA code it would be better to encode the
                ! matrix element, and maybe also the RDM index in the
                ! Sing_ExcDjs array, so we do not have to recalculate it!
                ! so we need nifguga entries or? one for the matrix element
                ! and i can use the delta-b storage to encode the RDM ind
                allocate(Sing_ExcDjs(0:nifguga, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
                allocate(Sing_ExcDjs2(0:nifguga, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
            else
                allocate(Sing_ExcDjs(0:NIfTot, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
                allocate(Sing_ExcDjs2(0:NIfTot, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
            end if
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_ExcDjs array.')
            call LogMemAlloc('Sing_ExcDjs', nint(nel * nbasis * MemoryFacPart) * (NIfTot + 1), &
                             size_n_int, t_r, Sing_ExcDjsTag, ierr)

            if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_ExcDjs2 array.')
            call LogMemAlloc('Sing_ExcDjs2', nint(nel * nbasis * MemoryFacPart) * (NIfTot + 1), &
                             size_n_int, t_r, Sing_ExcDjs2Tag, ierr)

            Sing_ExcDjs(:, :) = 0
            Sing_ExcDjs2(:, :) = 0

            memory_alloc = memory_alloc + ((NIfTot + 1) * nint((nel * nbasis) * MemoryFacPart) * size_n_int * 2)

            ! We need room to potentially generate N*M single excitations but
            ! these will be spread across each processor.
            OneEl_Gap = (real(nel, dp) * real(nbasis, dp) * MemoryFacPart) / real(nProcessors, dp)

            ! This array contains the initial positions of the excitations
            ! for each processor.
            allocate(Sing_InitExcSlots(0:(nProcessors - 1)), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_InitExcSlots array,')
            do iproc = 0, nProcessors - 1
                Sing_InitExcSlots(iproc) = nint(OneEl_Gap * iproc) + 1
            end do

            ! This array contains the current position of the excitations as
            ! they're added.
            allocate(Sing_ExcList(0:(nProcessors - 1)), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_ExcList array,')
            Sing_ExcList(:) = Sing_InitExcSlots(:)

            if (RDMExcitLevel /= 1) then
                ! This array actually contains the excitations in blocks of
                ! the processor they will be sent to.
                if (tGUGA) then
                    allocate(Doub_ExcDjs(0:nifguga, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
                    allocate(Doub_ExcDjs2(0:nifguga, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
                else
                    allocate(Doub_ExcDjs(0:NIfTot, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
                    allocate(Doub_ExcDjs2(0:NIfTot, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
                end if
                if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_ExcDjs array.')
                call LogMemAlloc('Doub_ExcDjs', nint(((nel * nbasis)**2) * MemoryFacPart) &
                                 * (NIfTot + 1), size_n_int, t_r, Doub_ExcDjsTag, ierr)

                if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_ExcDjs2 array.')
                call LogMemAlloc('Doub_ExcDjs2', nint(((nel * nbasis)**2) * MemoryFacPart) &
                                 * (NIfTot + 1), size_n_int, t_r, Doub_ExcDjs2Tag, ierr)

                memory_alloc = memory_alloc + ((NIfTot + 1) * nint(((nel * nbasis)**2) * MemoryFacPart) * size_n_int * 2)

                ! We need room to potentially generate (N*M)^2 double excitations
                ! but these will be spread across each processor.
                TwoEl_Gap = (((real(nel, dp) * real(nbasis, dp))**2) * MemoryFacPart) / real(nProcessors, dp)

                Doub_ExcDjs(:, :) = 0
                Doub_ExcDjs2(:, :) = 0

                ! This array contains the initial positions of the excitations
                ! for each processor.
                allocate(Doub_InitExcSlots(0:(nProcessors - 1)), stat=ierr)
                if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_InitExcSlots array,')
                do iproc = 0, nProcessors - 1
                    Doub_InitExcSlots(iproc) = nint(TwoEl_Gap * iproc) + 1
                end do

                ! This array contains the current position of the excitations
                ! as they're added.
                allocate(Doub_ExcList(0:(nProcessors - 1)), stat=ierr)
                if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_ExcList array,')
                Doub_ExcList(:) = Doub_InitExcSlots(:)
            end if

        else

            ! Finally, we need to hold onto the parents of the spawned particles.
            ! This is not necessary if we're doing completely explicit calculations.
            ! WD: maybe I have to change this for the GUGA implementation..
            allocate(Spawned_Parents(0:IlutBitsParent%len_tot, MaxSpawned), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating Spawned_Parents array,')
            call LogMemAlloc('Spawned_Parents', MaxSpawned * IlutBitsParent%len_tot, &
                             size_n_int, t_r, Spawned_ParentsTag, ierr)
            allocate(Spawned_Parents_Index(2, MaxSpawned), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating Spawned_Parents_Index array,')
            call LogMemAlloc('Spawned_Parents_Index', MaxSpawned * 2, 4, t_r, &
                             Spawned_Parents_IndexTag, ierr)

            memory_alloc = memory_alloc + ((NIfTot + 3) * MaxSpawned * size_n_int)
            memory_alloc = memory_alloc + (2 * MaxSpawned * 4)

        end if

        if (iProcIndex == 0) then
            write(stdout, "(A,F14.6,A,F14.6,A)") " Main RDM memory arrays consists of: ", &
                real(memory_alloc, dp) / 1048576.0_dp, " MB per MPI process."
        end if

        ! These parameters are set for the set up of the symmetry arrays, which
        ! are later used for the diagonalisation / rotation of the 1-RDMs.

        if (tOpenShell) then
            if (tFixLz) then
                NoSymLabelCounts = 16 * ((2 * iMaxLz) + 1)
            else
                NoSymLabelCounts = 16
            end if
        else
            if (tFixLz) then
                NoSymLabelCounts = 8 * ((2 * iMaxLz) + 1)
            else
                NoSymLabelCounts = 8
            end if
        end if

        if (RDMExcitLevel == 1 .or. RDMExcitLevel == 3 .or. tDiagRDM .or. tPrint1RDM .or. tDumpForcesInfo .or. tDipoles) then
            ! These arrays contain indexing systems to order the 1-RDM orbitals
            ! in terms of symmetry. This allows the diagonalisation of the RDMs
            ! to be done in symmetry blocks (a lot quicker/easier).

            if (.not. allocated(SymLabelCounts2_rot)) allocate(SymLabelCounts2_rot(2, NoSymLabelCounts), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating SymLabelCounts2_rot array,')
            call LogMemAlloc('SymLabelCounts2_rot', 2 * NoSymLabelCounts, 4, t_r, SymLabelCounts2_rotTag, ierr)
            SymLabelCounts2_rot = 0

            allocate(SymLabelList2_rot(NoOrbs), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating SymLabelList2_rot array,')
            call LogMemAlloc('SymLabelList2_rot', NoOrbs, 4, t_r, SymLabelList2_rotTag, ierr)
            SymLabelList2_rot = 0

            allocate(SymLabelListInv_rot(NoOrbs), stat=ierr)
            if (ierr /= 0) call stop_all(t_r, 'Problem allocating SymLabelListInv_rot array,')
            call LogMemAlloc('SymLabelListInv_rot', NoOrbs, 4, t_r, SymLabelListInv_rotTag, ierr)
            SymLabelListInv_rot = 0

            ! This routine actually sets up the symmetry labels for the 1-RDM.
            call SetUpSymLabels_RDM()
        end if

        if (tPrint1RDMsFromSpinfree) then
            if (tGUGA) then
                call stop_all(t_r, "check if 'print-1rdms-from-spinfree' &
                   & works with GUGA")
            end if
            call read_spinfree_2rdm_files(rdm_definitions, two_rdm_main, two_rdm_spawn)
            call print_1rdms_from_sf2rdms_wrapper(rdm_definitions, one_rdms, two_rdm_main)
            ! now clear these objects before the main simulation.
            call clear_one_rdms(one_rdms)
            call clear_rdm_list_t(two_rdm_main)
        end if

        if (tReadRDMs) then
            if (tGUGA) then
                call stop_all(t_r, "check if reading RDMs works with GUGA")
            end if
            if (RDMExcitLevel == 1) then
                do irdm = 1, size(one_rdms)
                    call read_1rdm(rdm_definitions, one_rdms(irdm), irdm)
                end do
            else
                call read_2rdm_popsfile(two_rdm_main, two_rdm_spawn)
                if (print_2rdm_est) call calc_2rdm_estimates_wrapper(rdm_definitions, rdm_estimates, two_rdm_main, en_pert_main)

                if (tPrint1RDMsFrom2RDMPops) then
                    call print_1rdms_from_2rdms_wrapper(rdm_definitions, one_rdms, two_rdm_main, tOpenShell)
                end if
            end if

            if (any(tSinglePartPhase)) then
                write (stdout, '("WARNING - Asking to read in the RDMs, but not varying shift from the beginning of &
                          &the calculation. All RDMs just read in will be zeroed, to prevent invalid averaging.")')
                ! Clear these objects, before the main simulation, since we
                ! haven't started averaging RDMs yet.
                call clear_one_rdms(one_rdms)
                call clear_rdm_list_t(two_rdm_main)
                ! Turn off tReadRDMs, since the read in RDMs aren't being
                ! used. Leaving it on affects some other stuff later.
                tReadRDMs = .false.
            end if
        end if

        ! By default, if we're writing out a popsfile (and doing an RDM
        ! calculation), we also write out the unnormalised RDMs that can be
        ! read in when restarting a calculation. If the NORDMSTOREAD option
        ! is on, these wont be printed.
        if (tPopsfile .and. (.not. tno_RDMs_to_read)) twrite_RDMs_to_read = .true.

        if (iProcIndex == 0) write(stdout, '(1X,"RDM memory allocation complete.",/)')

        nElRDM_Time%timer_name = 'nElRDMTime'
        FinaliseRDMs_Time%timer_name = 'FinaliseRDMsTime'
        RDMEnergy_Time%timer_name = 'RDMEnergyTime'

    end subroutine init_rdms