subroutine calc_final_hamil_elem(lanc_vecs, full_vec, proj_hamil, counts, disps)
real(dp), intent(inout) :: lanc_vecs(:, :)
real(dp), intent(inout) :: full_vec(:)
real(dp), intent(inout) :: proj_hamil(:, :)
integer(MPIArg), intent(in) :: counts(0:nProcessors - 1), disps(0:nProcessors - 1)
integer :: i, j, final_elem
real(dp) :: temp, overlap, tot_overlap
final_elem = size(proj_hamil, 1)
call MPIAllGatherV(lanc_vecs(:, final_elem), full_vec, counts, disps)
overlap = 0.0_dp
do i = 1, counts(iProcIndex)
! If we denote the final Lanczos vector as V, then at the end of the following
! do loop, temp will hold (H*V)_i. That is, the element of H*V corresponding to
! the i'th basis vector, which we're currently on (where H is the Hamiltonian matrix).
temp = 0.0_dp
do j = 1, sparse_ham(i)%num_elements
temp = temp + sparse_ham(i)%elements(j) * full_vec(sparse_ham(i)%positions(j))
end do
overlap = overlap + temp * lanc_vecs(i, final_elem)
end do
call MPISumAll(overlap, tot_overlap)
proj_hamil(final_elem, final_elem) = tot_overlap
end subroutine calc_final_hamil_elem