calculate_sparse_ham_par Subroutine

public subroutine calculate_sparse_ham_par(num_states, ilut_list, tPrintInfo)

Arguments

Type IntentOptional Attributes Name
integer(kind=MPIArg), intent(in) :: num_states(0:nProcessors-1)
integer(kind=n_int), intent(in) :: ilut_list(0:NIfTot,num_states(iProcIndex))
logical, intent(in) :: tPrintInfo

Contents


Source Code

    subroutine calculate_sparse_ham_par(num_states, ilut_list, tPrintInfo)

        integer(MPIArg), intent(in) :: num_states(0:nProcessors - 1)
        integer(n_int), intent(in) :: ilut_list(0:NIfTot, num_states(iProcIndex))
        logical, intent(in) :: tPrintInfo
        integer(MPIArg) :: disps(0:nProcessors - 1)
        integer :: i, j, row_size, counter, num_states_tot, ierr, bytes_required
        integer :: nI(nel), nJ(nel)
        integer(n_int), allocatable, dimension(:, :) :: temp_store
        integer(TagIntType) :: TempStoreTag, HRTag, SDTag
        HElement_t(dp), allocatable, dimension(:) :: hamiltonian_row
        character(len=*), parameter :: t_r = "calculate_sparse_ham_par"

        integer :: pos, nexcits
        integer(n_int) :: ilutG(0:nifguga)
        integer(n_int), pointer :: excitations(:, :)
        type(ExcitationInformation_t) :: excitInfo
        type(CSF_Info_t) :: csf_i, csf_j

        num_states_tot = int(sum(num_states))
        disps(0) = 0
        do i = 1, nProcessors - 1
            disps(i) = disps(i - 1) + num_states(i - 1)
        end do

        safe_realloc(sparse_ham, (num_states(iProcIndex)))
        safe_realloc(SparseHamilTags, (2, num_states(iProcIndex)))
        safe_realloc(hamiltonian_row, (num_states_tot))
        call LogMemAlloc('hamiltonian_row', num_states_tot, 8, t_r, HRTag)
        safe_realloc(hamil_diag, (num_states(iProcIndex)))
        call LogMemAlloc('hamil_diag', int(num_states(iProcIndex)), 8, t_r, HDiagTag)
        safe_realloc(temp_store, (0:NIfTot, num_states_tot))
        call LogMemAlloc('temp_store', num_states_tot * (NIfTot + 1), 8, t_r, TempStoreTag)

        ! Stick together the determinants from all processors, on all processors.
        call MPIAllGatherV(ilut_list(:, 1:num_states(iProcIndex)), temp_store, num_states, disps)

        ! Loop over all determinants on this processor.
        do i = 1, num_states(iProcIndex)

            call decode_bit_det(nI, ilut_list(:, i))

            row_size = 0
            hamiltonian_row = 0.0_dp
            ! Loop over all determinants on all processors.

            if (tGUGA) csf_i = CSF_Info_t(ilut_list(0:nifd, i))

            do j = 1, num_states_tot

                call decode_bit_det(nJ, temp_store(:, j))
                if (tGUGA) csf_j = CSF_Info_t(temp_store(:, j))

                ! If on the diagonal of the Hamiltonian.
                if (DetBitEq(ilut_list(:, i), temp_store(:, j), nifd)) then
                    if (tHPHF) then
                        hamiltonian_row(j) = hphf_diag_helement(nI, ilut_list(:, i))
                    else if (tGUGA) then
                        hamiltonian_row(j) = calcDiagMatEleGuga_nI(nI)
                    else
                        hamiltonian_row(j) = get_helement(nI, nJ, 0)
                    end if
                    hamil_diag(i) = hamiltonian_row(j)
                    ! Always include the diagonal elements.
                    row_size = row_size + 1
                else
                    if (tHPHF) then
                        hamiltonian_row(j) = hphf_off_diag_helement(nI, nJ, ilut_list(:, i), temp_store(:, j))
                    else if (tGUGA) then
                        call calc_guga_matrix_element(&
                            ilut_list(:, i), csf_i, temp_store(:, j), &
                            csf_j, excitInfo, hamiltonian_row(j), .true.)
#ifdef CMPLX_
                        hamiltonian_row(j) = conjg(hamiltonian_row(j))
#endif
                        ! call calc_guga_matrix_element(temp_store(:,j), ilut_list(:,i), &
                        !     excitInfo, hamiltonian_row(j), .true., 2)
                    else
                        hamiltonian_row(j) = get_helement(nI, nJ, ilut_list(:, i), temp_store(:, j))
                    end if
                    if (abs(hamiltonian_row(j)) > 0.0_dp) row_size = row_size + 1
                end if
            end do

            if (tPrintInfo) then
                if (i == 1) then
                    bytes_required = row_size * (8 + sizeof_int)
                    write(stdout, '(1x,a43)') "About to allocate first row of Hamiltonian."
                    write(stdout, '(1x,a40,1x,i8)') "The memory (bytes) required for this is:", bytes_required
                    write(stdout, '(1x,a71,1x,i7)') "The total number of determinants (and hence rows) on this processor is:", &
                        num_states(iProcIndex)
                    write(stdout, '(1x,a58,1x,i7)') "The total number of determinants across all processors is:", num_states_tot
                    write(stdout, '(1x,a77,1x,i7)') "It is therefore expected that the total memory (MB) required will be roughly:", &
                        num_states_tot * bytes_required / 1000000
                else if (mod(i, 1000) == 0) then
                    write(stdout, '(1x,a23,1x,i7)') "Finished computing row:", i
                end if
            end if

            ! Now we know the number of non-zero elements in this row of the Hamiltonian, so allocate it.
            call allocate_sparse_ham_row(sparse_ham, i, row_size, "sparse_ham", SparseHamilTags(:, i))

            sparse_ham(i)%elements = 0.0_dp
            sparse_ham(i)%positions = 0
            sparse_ham(i)%num_elements = row_size

            counter = 1
            do j = 1, num_states_tot
                ! If non-zero or a diagonal element.
                if (abs(hamiltonian_row(j)) > 0.0_dp .or. (j == i + disps(iProcIndex))) then
                    sparse_ham(i)%positions(counter) = j
                    sparse_ham(i)%elements(counter) = hamiltonian_row(j)
                    counter = counter + 1
                end if
                if (counter == row_size + 1) exit
            end do

        end do

        call MPIBarrier(ierr)

        deallocate(temp_store, stat=ierr)
        call LogMemDealloc(t_r, TempStoreTag, ierr)
        deallocate(hamiltonian_row, stat=ierr)
        call LogMemDealloc(t_r, HRTag, ierr)

    end subroutine calculate_sparse_ham_par