subroutine calculate_sparse_ham_par(num_states, ilut_list, tPrintInfo)
integer(MPIArg), intent(in) :: num_states(0:nProcessors - 1)
integer(n_int), intent(in) :: ilut_list(0:NIfTot, num_states(iProcIndex))
logical, intent(in) :: tPrintInfo
integer(MPIArg) :: disps(0:nProcessors - 1)
integer :: i, j, row_size, counter, num_states_tot, ierr, bytes_required
integer :: nI(nel), nJ(nel)
integer(n_int), allocatable, dimension(:, :) :: temp_store
integer(TagIntType) :: TempStoreTag, HRTag, SDTag
HElement_t(dp), allocatable, dimension(:) :: hamiltonian_row
character(len=*), parameter :: t_r = "calculate_sparse_ham_par"
integer :: pos, nexcits
integer(n_int) :: ilutG(0:nifguga)
integer(n_int), pointer :: excitations(:, :)
type(ExcitationInformation_t) :: excitInfo
type(CSF_Info_t) :: csf_i, csf_j
num_states_tot = int(sum(num_states))
disps(0) = 0
do i = 1, nProcessors - 1
disps(i) = disps(i - 1) + num_states(i - 1)
end do
safe_realloc(sparse_ham, (num_states(iProcIndex)))
safe_realloc(SparseHamilTags, (2, num_states(iProcIndex)))
safe_realloc(hamiltonian_row, (num_states_tot))
call LogMemAlloc('hamiltonian_row', num_states_tot, 8, t_r, HRTag)
safe_realloc(hamil_diag, (num_states(iProcIndex)))
call LogMemAlloc('hamil_diag', int(num_states(iProcIndex)), 8, t_r, HDiagTag)
safe_realloc(temp_store, (0:NIfTot, num_states_tot))
call LogMemAlloc('temp_store', num_states_tot * (NIfTot + 1), 8, t_r, TempStoreTag)
! Stick together the determinants from all processors, on all processors.
call MPIAllGatherV(ilut_list(:, 1:num_states(iProcIndex)), temp_store, num_states, disps)
! Loop over all determinants on this processor.
do i = 1, num_states(iProcIndex)
call decode_bit_det(nI, ilut_list(:, i))
row_size = 0
hamiltonian_row = 0.0_dp
! Loop over all determinants on all processors.
if (tGUGA) csf_i = CSF_Info_t(ilut_list(0:nifd, i))
do j = 1, num_states_tot
call decode_bit_det(nJ, temp_store(:, j))
if (tGUGA) csf_j = CSF_Info_t(temp_store(:, j))
! If on the diagonal of the Hamiltonian.
if (DetBitEq(ilut_list(:, i), temp_store(:, j), nifd)) then
if (tHPHF) then
hamiltonian_row(j) = hphf_diag_helement(nI, ilut_list(:, i))
else if (tGUGA) then
hamiltonian_row(j) = calcDiagMatEleGuga_nI(nI)
else
hamiltonian_row(j) = get_helement(nI, nJ, 0)
end if
hamil_diag(i) = hamiltonian_row(j)
! Always include the diagonal elements.
row_size = row_size + 1
else
if (tHPHF) then
hamiltonian_row(j) = hphf_off_diag_helement(nI, nJ, ilut_list(:, i), temp_store(:, j))
else if (tGUGA) then
call calc_guga_matrix_element(&
ilut_list(:, i), csf_i, temp_store(:, j), &
csf_j, excitInfo, hamiltonian_row(j), .true.)
#ifdef CMPLX_
hamiltonian_row(j) = conjg(hamiltonian_row(j))
#endif
! call calc_guga_matrix_element(temp_store(:,j), ilut_list(:,i), &
! excitInfo, hamiltonian_row(j), .true., 2)
else
hamiltonian_row(j) = get_helement(nI, nJ, ilut_list(:, i), temp_store(:, j))
end if
if (abs(hamiltonian_row(j)) > 0.0_dp) row_size = row_size + 1
end if
end do
if (tPrintInfo) then
if (i == 1) then
bytes_required = row_size * (8 + sizeof_int)
write(stdout, '(1x,a43)') "About to allocate first row of Hamiltonian."
write(stdout, '(1x,a40,1x,i8)') "The memory (bytes) required for this is:", bytes_required
write(stdout, '(1x,a71,1x,i7)') "The total number of determinants (and hence rows) on this processor is:", &
num_states(iProcIndex)
write(stdout, '(1x,a58,1x,i7)') "The total number of determinants across all processors is:", num_states_tot
write(stdout, '(1x,a77,1x,i7)') "It is therefore expected that the total memory (MB) required will be roughly:", &
num_states_tot * bytes_required / 1000000
else if (mod(i, 1000) == 0) then
write(stdout, '(1x,a23,1x,i7)') "Finished computing row:", i
end if
end if
! Now we know the number of non-zero elements in this row of the Hamiltonian, so allocate it.
call allocate_sparse_ham_row(sparse_ham, i, row_size, "sparse_ham", SparseHamilTags(:, i))
sparse_ham(i)%elements = 0.0_dp
sparse_ham(i)%positions = 0
sparse_ham(i)%num_elements = row_size
counter = 1
do j = 1, num_states_tot
! If non-zero or a diagonal element.
if (abs(hamiltonian_row(j)) > 0.0_dp .or. (j == i + disps(iProcIndex))) then
sparse_ham(i)%positions(counter) = j
sparse_ham(i)%elements(counter) = hamiltonian_row(j)
counter = counter + 1
end if
if (counter == row_size + 1) exit
end do
end do
call MPIBarrier(ierr)
deallocate(temp_store, stat=ierr)
call LogMemDealloc(t_r, TempStoreTag, ierr)
deallocate(hamiltonian_row, stat=ierr)
call LogMemDealloc(t_r, HRTag, ierr)
end subroutine calculate_sparse_ham_par