subroutine init_rdms(nrdms_standard, nrdms_transition)
use DeterminantData, only: write_det
use CalcData, only: MemoryFacPart, tEN2
use FciMCData, only: MaxSpawned, Spawned_Parents, Spawned_Parents_Index
use FciMCData, only: Spawned_ParentsTag, Spawned_Parents_IndexTag
use FciMCData, only: HFDet_True, tSinglePartPhase, AvNoatHF, IterRDM_HF
use global_det_data, only: len_av_sgn_tot, len_iter_occ_tot
use LoggingData, only: tDo_Not_Calc_2RDM_est, RDMExcitLevel, tExplicitAllRDM
use LoggingData, only: tDiagRDM, tDumpForcesInfo, tDipoles, tPrint1RDM
use LoggingData, only: tReadRDMs, tPopsfile, tno_RDMs_to_read
use LoggingData, only: twrite_RDMs_to_read, tPrint1RDMsFrom2RDMPops
use LoggingData, only: tPrint1RDMsFromSpinfree, t_spin_resolved_rdms
use rdm_data, only: rdm_estimates, one_rdms, two_rdm_spawn, two_rdm_main, two_rdm_recv
use rdm_data, only: two_rdm_recv_2, tOpenShell, print_2rdm_est, Sing_ExcDjs, Doub_ExcDjs
use rdm_data, only: Sing_ExcDjs2, Doub_ExcDjs2, Sing_ExcDjsTag, Doub_ExcDjsTag
use rdm_data, only: Sing_ExcDjs2Tag, Doub_ExcDjs2Tag, OneEl_Gap, TwoEl_Gap
use rdm_data, only: Sing_InitExcSlots, Doub_InitExcSlots, Sing_ExcList, Doub_ExcList
use rdm_data, only: nElRDM_Time, FinaliseRDMs_time, RDMEnergy_time, states_for_transition_rdm
use rdm_data, only: rdm_main_size_fac, rdm_spawn_size_fac, rdm_recv_size_fac
use rdm_data, only: rdm_definitions, en_pert_main, inits_estimates, tOpenSpatialOrbs
use rdm_data_utils, only: init_rdm_spawn_t, init_rdm_list_t, init_one_rdm_t
use rdm_data_utils, only: init_rdm_definitions_t, clear_one_rdms, clear_rdm_list_t
use rdm_data_utils, only: init_en_pert_t
use rdm_estimators, only: init_rdm_estimates_t, calc_2rdm_estimates_wrapper
use RotateOrbsData, only: SymLabelCounts2_rot, SymLabelList2_rot, SymLabelListInv_rot
use RotateOrbsData, only: SymLabelCounts2_rotTag, SymLabelList2_rotTag, NoOrbs
use RotateOrbsData, only: SymLabelListInv_rotTag, SpatOrbs, NoSymLabelCounts
use SystemData, only: tStoreSpinOrbs, tHPHF, tFixLz, iMaxLz, tROHF
integer, intent(in) :: nrdms_standard, nrdms_transition
integer :: nrdms
integer :: rdm_nrows, nhashes_rdm_main, nhashes_rdm_spawn
integer :: standard_spawn_size, min_spawn_size
integer :: max_nelems_main, max_nelems_spawn, max_nelems_recv, max_nelems_recv_2
integer(int64) :: memory_alloc, main_mem, spawn_mem, recv_mem
integer :: ndets_en_pert, nhashes_en_pert
integer :: irdm, iproc, ierr
character(len=*), parameter :: t_r = 'init_rdms'
#ifdef CMPLX_
call stop_all(t_r, 'Filling of reduced density matrices not working with complex walkers yet.')
#endif
nrdms = nrdms_standard + nrdms_transition
! Only spatial orbitals for the 2-RDMs (and F12).
if (tStoreSpinOrbs .and. RDMExcitLevel /= 1) then
call stop_all(t_r, '2-RDM calculations not set up for systems stored as spin orbitals.')
end if
if (tROHF .or. tStoreSpinOrbs .or. t_spin_resolved_rdms) then
tOpenShell = .true.
else
tOpenShell = .false.
end if
! it is possible to have open-shell systems with spatial orbitals,
! these have to be indexed differently
tOpenSpatialOrbs = tOpenShell .and. .not. tStoreSpinOrbs
if (tExplicitAllRDM) then
write(stdout, '(1X,"Explicitly calculating the reduced density matrices from the FCIQMC wavefunction.")')
else
write(stdout, '(1X,"Stochastically calculating the reduced density matrices from the FCIQMC wavefunction")')
write(stdout, '(1X,"incl. explicit connections to the following HF determinant:")', advance='no')
call write_det(stdout, HFDet_True, .true.)
end if
if (RDMExcitLevel == 1) then
print_2rdm_est = .false.
else
! If the RDMExcitLevel is 2 or 3 - and we're calculating the 2-RDM,
! then we automatically calculate the energy (and other estimates!)
! unless we're specifically told not to.
if (tDo_Not_Calc_2RDM_est) then
print_2rdm_est = .false.
else
print_2rdm_est = .true.
write (stdout, '(1X,"Calculating the energy from the reduced density matrix. &
&This requires the 2 electron RDM from which the 1-RDM can also be constructed.")')
end if
end if
call init_rdm_definitions_t(rdm_definitions, nrdms_standard, nrdms_transition, states_for_transition_rdm)
if (tinitsRDM) call init_rdm_definitions_t( &
rdm_inits_defs, nrdms_standard, nrdms_transition, states_for_transition_rdm, 'Inits_TwoRDM')
! Allocate arrays for holding averaged signs and block lengths for the
! HF determinant.
allocate(AvNoatHF(len_av_sgn_tot))
AvNoatHF = 0.0_dp
allocate(IterRDM_HF(len_iter_occ_tot))
IterRDM_HF = 0
! Have not got HPHF working with the explicit or truncated methods yet.
! Neither of these would be too difficult to implement.
if (tHPHF .and. tExplicitAllRDM) call stop_all(t_r, 'HPHF not set up with the explicit calculation of the RDM.')
if (tDipoles) then
write (stdout, '("WARNING - The calculation of dipole moments is currently not supported for the new RDM code. &
&Use the OLDRDMS option to use feature.")')
end if
SpatOrbs = nbasis / 2
if (tOpenShell) then
NoOrbs = nbasis
else
NoOrbs = SpatOrbs
end if
! The memory of (large) alloctaed arrays, per MPI process.
memory_alloc = 0_int64
! For now, create RDM array big enough so that *all* RDM elements on
! a particular processor can be stored, using the usual approximations
! to take symmetry into account. Include a factor of 1.5 to account for
! factors such as imperfect load balancing (which affects the spawned
! array).
rdm_nrows = nbasis * (nbasis - 1) / 2
max_nelems_main = int(1.5_dp * real(rdm_nrows**2, dp) / (8._dp * nProcessors) * rdm_main_size_fac)
nhashes_rdm_main = int(0.75 * max_nelems_main * rdm_main_size_fac)
main_mem = max_nelems_main * (nrdms + 1) * size_int_rdm
if (tinitsRDM) main_mem = 2 * main_mem
write(stdout, '(/,1X,"About to allocate main RDM array, size per MPI process (MB):", f14.6)') real(main_mem, dp) / 1048576.0_dp
call init_rdm_list_t(two_rdm_main, nrdms, max_nelems_main, nhashes_rdm_main)
if (tinitsRDM) then
call init_rdm_list_t(two_rdm_inits, nrdms, max_nelems_main, nhashes_rdm_main)
end if
write(stdout, '(1X,"Allocation of main RDM array complete.")')
! Factor of 10 over perfectly distributed size, for some safety.
standard_spawn_size = int(10_int64 * rdm_nrows**2_int64 / (8_int64 * nProcessors))
! For cases where we have a small number of orbitals but large number
! of processors (i.e., large CASSCF calculations), we may find the
! above standard_spawn_size is less than nProcessors. Thus, there
! would not be at least one spawning slot per processor. In such cases
! make sure that we have at least 50 per processor, for some safety.
min_spawn_size = int(50_int64 * nProcessors)
max_nelems_spawn = max(standard_spawn_size, min_spawn_size) * int(rdm_spawn_size_fac)
nhashes_rdm_spawn = int(0.75 * max_nelems_spawn * rdm_spawn_size_fac)
spawn_mem = max_nelems_spawn * (nrdms + 1) * size_int_rdm
if (tinitsRDM) spawn_mem = 2 * spawn_mem
write(stdout, '(1X,"About to allocate RDM spawning array, size per MPI process (MB):", f14.6)') real(spawn_mem, dp) / 1048576.0_dp
call init_rdm_spawn_t(two_rdm_spawn, rdm_nrows, nrdms, max_nelems_spawn, nhashes_rdm_spawn)
if (tinitsRDM) then
call init_rdm_spawn_t(two_rdm_inits_spawn, rdm_nrows, nrdms, max_nelems_spawn, &
nhashes_rdm_spawn)
end if
write(stdout, '(1X,"Allocation of RDM spawning array complete.")')
max_nelems_recv = 4 * rdm_nrows**2 / (8 * nProcessors) * int(rdm_recv_size_fac)
max_nelems_recv_2 = 2 * rdm_nrows**2 / (8 * nProcessors) * int(rdm_recv_size_fac)
recv_mem = (max_nelems_recv + max_nelems_recv_2) * (nrdms + 1) * size_int_rdm
write(stdout, '(1X,"About to allocate RDM receiving arrays, size per MPI process (MB):", f14.6)') real(recv_mem, dp) / 1048576.0_dp
! Don't need the hash table for the received list, so pass 0 for nhashes.
call init_rdm_list_t(two_rdm_recv, nrdms, max_nelems_recv, 0)
call init_rdm_list_t(two_rdm_recv_2, nrdms, max_nelems_recv_2, 0)
write(stdout, '(1X,"Allocation of RDM receiving arrays complete.",/)')
! Count the memory the various RDM lists (but this does *not* count
! the memory of the hash tables - this will increase dynamically
! throughout the simulation).
memory_alloc = memory_alloc + main_mem + spawn_mem + recv_mem
call init_rdm_estimates_t(rdm_estimates, nrdms_standard, nrdms_transition, print_2rdm_est)
if (tOutputInitsRDM .or. tInitsRDMRef) &
call init_rdm_estimates_t(inits_estimates, nrdms_standard, &
nrdms_transition, print_2rdm_est, 'InitsRDMEstimates')
! Initialise 1-RDM objects.
if (RDMExcitLevel == 1 .or. RDMExcitLevel == 3 .or. &
tDiagRDM .or. tPrint1RDM .or. tDumpForcesInfo .or. tDipoles) then
allocate(one_rdms(nrdms), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating one_rdms array.')
if (tinitsRDM) then
allocate(inits_one_rdms(nrdms), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating one_rdms array.')
end if
do irdm = 1, nrdms
call init_one_rdm_t(one_rdms(irdm), NoOrbs)
memory_alloc = memory_alloc + (NoOrbs * NoOrbs * 8)
if (tinitsRDM) then
call init_one_rdm_t(inits_one_rdms(irdm), NoOrbs)
memory_alloc = memory_alloc + (NoOrbs * NoOrbs * 8)
end if
end do
end if
if (tEN2) then
! Initialise Epstein-Nesbet perturbation object.
ndets_en_pert = MaxSpawned
nhashes_en_pert = int(0.8 * MaxSpawned)
call init_en_pert_t(en_pert_main, nrdms_standard, ndets_en_pert, nhashes_en_pert)
end if
! We then need to allocate the arrays for excitations etc when doing
! the explicit all calculation.
if (tExplicitAllRDM) then
! We always calculate the single stuff - and if RDMExcitLevel is 1,
! this is all, otherwise calculate the double stuff too.
! This array actually contains the excitations in blocks of the
! processor they will be sent to. Only needed if the 1-RDM is the
! only thing being calculated.
if (tGUGA) then
! in the GUGA code it would be better to encode the
! matrix element, and maybe also the RDM index in the
! Sing_ExcDjs array, so we do not have to recalculate it!
! so we need nifguga entries or? one for the matrix element
! and i can use the delta-b storage to encode the RDM ind
allocate(Sing_ExcDjs(0:nifguga, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
allocate(Sing_ExcDjs2(0:nifguga, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
else
allocate(Sing_ExcDjs(0:NIfTot, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
allocate(Sing_ExcDjs2(0:NIfTot, nint((nel * nbasis) * MemoryFacPart)), stat=ierr)
end if
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_ExcDjs array.')
call LogMemAlloc('Sing_ExcDjs', nint(nel * nbasis * MemoryFacPart) * (NIfTot + 1), &
size_n_int, t_r, Sing_ExcDjsTag, ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_ExcDjs2 array.')
call LogMemAlloc('Sing_ExcDjs2', nint(nel * nbasis * MemoryFacPart) * (NIfTot + 1), &
size_n_int, t_r, Sing_ExcDjs2Tag, ierr)
Sing_ExcDjs(:, :) = 0
Sing_ExcDjs2(:, :) = 0
memory_alloc = memory_alloc + ((NIfTot + 1) * nint((nel * nbasis) * MemoryFacPart) * size_n_int * 2)
! We need room to potentially generate N*M single excitations but
! these will be spread across each processor.
OneEl_Gap = (real(nel, dp) * real(nbasis, dp) * MemoryFacPart) / real(nProcessors, dp)
! This array contains the initial positions of the excitations
! for each processor.
allocate(Sing_InitExcSlots(0:(nProcessors - 1)), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_InitExcSlots array,')
do iproc = 0, nProcessors - 1
Sing_InitExcSlots(iproc) = nint(OneEl_Gap * iproc) + 1
end do
! This array contains the current position of the excitations as
! they're added.
allocate(Sing_ExcList(0:(nProcessors - 1)), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Sing_ExcList array,')
Sing_ExcList(:) = Sing_InitExcSlots(:)
if (RDMExcitLevel /= 1) then
! This array actually contains the excitations in blocks of
! the processor they will be sent to.
if (tGUGA) then
allocate(Doub_ExcDjs(0:nifguga, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
allocate(Doub_ExcDjs2(0:nifguga, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
else
allocate(Doub_ExcDjs(0:NIfTot, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
allocate(Doub_ExcDjs2(0:NIfTot, nint(((nel * nbasis)**2) * MemoryFacPart)), stat=ierr)
end if
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_ExcDjs array.')
call LogMemAlloc('Doub_ExcDjs', nint(((nel * nbasis)**2) * MemoryFacPart) &
* (NIfTot + 1), size_n_int, t_r, Doub_ExcDjsTag, ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_ExcDjs2 array.')
call LogMemAlloc('Doub_ExcDjs2', nint(((nel * nbasis)**2) * MemoryFacPart) &
* (NIfTot + 1), size_n_int, t_r, Doub_ExcDjs2Tag, ierr)
memory_alloc = memory_alloc + ((NIfTot + 1) * nint(((nel * nbasis)**2) * MemoryFacPart) * size_n_int * 2)
! We need room to potentially generate (N*M)^2 double excitations
! but these will be spread across each processor.
TwoEl_Gap = (((real(nel, dp) * real(nbasis, dp))**2) * MemoryFacPart) / real(nProcessors, dp)
Doub_ExcDjs(:, :) = 0
Doub_ExcDjs2(:, :) = 0
! This array contains the initial positions of the excitations
! for each processor.
allocate(Doub_InitExcSlots(0:(nProcessors - 1)), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_InitExcSlots array,')
do iproc = 0, nProcessors - 1
Doub_InitExcSlots(iproc) = nint(TwoEl_Gap * iproc) + 1
end do
! This array contains the current position of the excitations
! as they're added.
allocate(Doub_ExcList(0:(nProcessors - 1)), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Doub_ExcList array,')
Doub_ExcList(:) = Doub_InitExcSlots(:)
end if
else
! Finally, we need to hold onto the parents of the spawned particles.
! This is not necessary if we're doing completely explicit calculations.
! WD: maybe I have to change this for the GUGA implementation..
allocate(Spawned_Parents(0:IlutBitsParent%len_tot, MaxSpawned), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Spawned_Parents array,')
call LogMemAlloc('Spawned_Parents', MaxSpawned * IlutBitsParent%len_tot, &
size_n_int, t_r, Spawned_ParentsTag, ierr)
allocate(Spawned_Parents_Index(2, MaxSpawned), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating Spawned_Parents_Index array,')
call LogMemAlloc('Spawned_Parents_Index', MaxSpawned * 2, 4, t_r, &
Spawned_Parents_IndexTag, ierr)
memory_alloc = memory_alloc + ((NIfTot + 3) * MaxSpawned * size_n_int)
memory_alloc = memory_alloc + (2 * MaxSpawned * 4)
end if
if (iProcIndex == 0) then
write(stdout, "(A,F14.6,A,F14.6,A)") " Main RDM memory arrays consists of: ", &
real(memory_alloc, dp) / 1048576.0_dp, " MB per MPI process."
end if
! These parameters are set for the set up of the symmetry arrays, which
! are later used for the diagonalisation / rotation of the 1-RDMs.
if (tOpenShell) then
if (tFixLz) then
NoSymLabelCounts = 16 * ((2 * iMaxLz) + 1)
else
NoSymLabelCounts = 16
end if
else
if (tFixLz) then
NoSymLabelCounts = 8 * ((2 * iMaxLz) + 1)
else
NoSymLabelCounts = 8
end if
end if
if (RDMExcitLevel == 1 .or. RDMExcitLevel == 3 .or. tDiagRDM .or. tPrint1RDM .or. tDumpForcesInfo .or. tDipoles) then
! These arrays contain indexing systems to order the 1-RDM orbitals
! in terms of symmetry. This allows the diagonalisation of the RDMs
! to be done in symmetry blocks (a lot quicker/easier).
if (.not. allocated(SymLabelCounts2_rot)) allocate(SymLabelCounts2_rot(2, NoSymLabelCounts), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating SymLabelCounts2_rot array,')
call LogMemAlloc('SymLabelCounts2_rot', 2 * NoSymLabelCounts, 4, t_r, SymLabelCounts2_rotTag, ierr)
SymLabelCounts2_rot = 0
allocate(SymLabelList2_rot(NoOrbs), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating SymLabelList2_rot array,')
call LogMemAlloc('SymLabelList2_rot', NoOrbs, 4, t_r, SymLabelList2_rotTag, ierr)
SymLabelList2_rot = 0
allocate(SymLabelListInv_rot(NoOrbs), stat=ierr)
if (ierr /= 0) call stop_all(t_r, 'Problem allocating SymLabelListInv_rot array,')
call LogMemAlloc('SymLabelListInv_rot', NoOrbs, 4, t_r, SymLabelListInv_rotTag, ierr)
SymLabelListInv_rot = 0
! This routine actually sets up the symmetry labels for the 1-RDM.
call SetUpSymLabels_RDM()
end if
if (tPrint1RDMsFromSpinfree) then
if (tGUGA) then
call stop_all(t_r, "check if 'print-1rdms-from-spinfree' &
& works with GUGA")
end if
call read_spinfree_2rdm_files(rdm_definitions, two_rdm_main, two_rdm_spawn)
call print_1rdms_from_sf2rdms_wrapper(rdm_definitions, one_rdms, two_rdm_main)
! now clear these objects before the main simulation.
call clear_one_rdms(one_rdms)
call clear_rdm_list_t(two_rdm_main)
end if
if (tReadRDMs) then
if (tGUGA) then
call stop_all(t_r, "check if reading RDMs works with GUGA")
end if
if (RDMExcitLevel == 1) then
do irdm = 1, size(one_rdms)
call read_1rdm(rdm_definitions, one_rdms(irdm), irdm)
end do
else
call read_2rdm_popsfile(two_rdm_main, two_rdm_spawn)
if (print_2rdm_est) call calc_2rdm_estimates_wrapper(rdm_definitions, rdm_estimates, two_rdm_main, en_pert_main)
if (tPrint1RDMsFrom2RDMPops) then
call print_1rdms_from_2rdms_wrapper(rdm_definitions, one_rdms, two_rdm_main, tOpenShell)
end if
end if
if (any(tSinglePartPhase)) then
write (stdout, '("WARNING - Asking to read in the RDMs, but not varying shift from the beginning of &
&the calculation. All RDMs just read in will be zeroed, to prevent invalid averaging.")')
! Clear these objects, before the main simulation, since we
! haven't started averaging RDMs yet.
call clear_one_rdms(one_rdms)
call clear_rdm_list_t(two_rdm_main)
! Turn off tReadRDMs, since the read in RDMs aren't being
! used. Leaving it on affects some other stuff later.
tReadRDMs = .false.
end if
end if
! By default, if we're writing out a popsfile (and doing an RDM
! calculation), we also write out the unnormalised RDMs that can be
! read in when restarting a calculation. If the NORDMSTOREAD option
! is on, these wont be printed.
if (tPopsfile .and. (.not. tno_RDMs_to_read)) twrite_RDMs_to_read = .true.
if (iProcIndex == 0) write(stdout, '(1X,"RDM memory allocation complete.",/)')
nElRDM_Time%timer_name = 'nElRDMTime'
FinaliseRDMs_Time%timer_name = 'FinaliseRDMsTime'
RDMEnergy_Time%timer_name = 'RDMEnergyTime'
end subroutine init_rdms