subroutine calc_trial_states_lanczos(space_in, nexcit, ndets_this_proc, trial_iluts, evecs_this_proc, evals, &
space_sizes, space_displs, reorder)
use bit_reps, only: decode_bit_det
use CalcData, only: subspace_in, t_force_lanczos
use DetBitOps, only: ilut_lt, ilut_gt
use FciMCData, only: ilutHF, CurrentDets, TotWalkers
use lanczos_wrapper, only: frsblk_wrapper
use Parallel_neci, only: MPIScatterV, MPIGatherV, MPIBCast, MPIArg, iProcIndex
use Parallel_neci, only: nProcessors
use MPI_wrapper, only: root
use sort_mod, only: sort
use SystemData, only: nel, tAllSymSectors, tGUGA
use sparse_arrays, only: calculate_sparse_ham_par, sparse_ham
use hamiltonian_linalg, only: sparse_hamil_type, parallel_sparse_hamil_type
use lanczos_general, only: LanczosCalcType, perform_lanczos, DestroyLanczosCalc
type(subspace_in) :: space_in
integer, intent(in) :: nexcit
integer, intent(out) :: ndets_this_proc
integer(n_int), intent(out) :: trial_iluts(0:, :)
HElement_t(dp), allocatable, intent(out) :: evecs_this_proc(:, :)
real(dp), intent(out) :: evals(:)
integer(MPIArg), intent(out) :: space_sizes(0:nProcessors - 1), space_displs(0:nProcessors - 1)
integer, optional, intent(in) :: reorder(nexcit)
integer(n_int), allocatable :: ilut_list(:, :)
integer, allocatable :: det_list(:, :)
integer :: i, max_elem_ind(1), ierr
integer :: temp_reorder(nexcit)
integer(MPIArg) :: ndets_all_procs, ndets_this_proc_mpi
integer(MPIArg) :: sndcnts(0:nProcessors - 1), displs(0:nProcessors - 1)
integer(MPIArg) :: rcvcnts
integer, allocatable :: evec_abs(:)
HElement_t(dp), allocatable :: evecs(:, :), evecs_transpose(:, :)
character(len=*), parameter :: this_routine = "calc_trial_states_lanczos"
type(LanczosCalcType) :: lanczosCalc
ndets_this_proc = 0
trial_iluts = 0_n_int
! do some GUGA checks to abort non-supported trial wavefunctions
if (tGUGA .and. (space_in%tCAS .or. space_in%tRAS .or. space_in%tFCI)) then
call stop_all(this_routine, "non-supported trial space for GUGA!")
end if
write(stdout, *) " Initialising wavefunctions by the Lanczos algorithm"
! Choose the correct generating routine.
if (space_in%tHF) call add_state_to_space(ilutHF, trial_iluts, ndets_this_proc)
if (space_in%tPops) call generate_space_most_populated(space_in%npops, &
space_in%tApproxSpace, space_in%nApproxSpace, trial_iluts, ndets_this_proc, GLOBAL_RUN)
if (space_in%tRead) call generate_space_from_file(space_in%read_filename, trial_iluts, ndets_this_proc)
if (space_in%tDoubles) then
if (tGUGA) then
call generate_sing_doub_guga(trial_iluts, ndets_this_proc, .false.)
else
call generate_sing_doub_determinants(trial_iluts, ndets_this_proc, .false.)
end if
end if
if (space_in%tCAS) call generate_cas(space_in%occ_cas, space_in%virt_cas, trial_iluts, ndets_this_proc)
if (space_in%tRAS) call generate_ras(space_in%ras, trial_iluts, ndets_this_proc)
if (space_in%tOptimised) call generate_optimised_space(space_in%opt_data, space_in%tLimitSpace, &
trial_iluts, ndets_this_proc, space_in%max_size)
if (space_in%tMP1) call generate_using_mp1_criterion(space_in%mp1_ndets, trial_iluts, ndets_this_proc)
if (space_in%tFCI) then
if (tAllSymSectors) then
call gndts_all_sym_this_proc(trial_iluts, .true., ndets_this_proc)
else
call generate_fci_core(trial_iluts, ndets_this_proc)
end if
end if
if (.not. (space_in%tPops .or. space_in%tRead .or. space_in%tDoubles .or. space_in%tCAS .or. &
space_in%tRAS .or. space_in%tOptimised .or. space_in%tMP1 .or. space_in%tFCI)) then
call stop_all(this_routine, "A space for the trial functions was not chosen.")
end if
ndets_this_proc_mpi = int(ndets_this_proc, MPIArg)
call MPIAllGather(ndets_this_proc_mpi, space_sizes, ierr)
ndets_all_procs = sum(space_sizes)
if (ndets_all_procs < lanczos_space_size_cutoff .and. .not. t_force_lanczos) then
write(stdout, *) " Aborting Lanczos and initialising trial states with direct diagonalisation"
call calc_trial_states_direct(space_in, nexcit, ndets_this_proc, trial_iluts, evecs_this_proc, evals, &
space_sizes, space_displs, reorder)
return
end if
if (ndets_all_procs < nexcit) call stop_all(this_routine, "The number of excited states that you have asked &
&for is larger than the size of the trial space used to create the excited states. Since this &
&routine generates trial states that are orthogonal, this is not possible.")
space_displs(0) = 0_MPIArg
do i = 1, nProcessors - 1
space_displs(i) = space_displs(i - 1) + space_sizes(i - 1)
end do
call sort(trial_iluts(:, 1:ndets_this_proc), ilut_lt, ilut_gt)
if (iProcIndex == root) then
allocate(ilut_list(0:NIfTot, ndets_all_procs))
else
! On these other processes ilut_list and evecs_transpose are not
! needed, but we need them to be allocated for the MPI wrapper
! function to work, so just allocate them to be small.
allocate(ilut_list(1, 1))
allocate(evecs_transpose(1, 1))
end if
call MPIGatherV(trial_iluts(:, 1:space_sizes(iProcIndex)), ilut_list, &
space_sizes, space_displs, ierr)
call calculate_sparse_ham_par(space_sizes, trial_iluts, .true.)
call MPIBarrier(ierr)
if (iProcIndex == root) then
allocate(det_list(nel, ndets_all_procs))
do i = 1, ndets_all_procs
call decode_bit_det(det_list(:, i), ilut_list(:, i))
end do
deallocate(ilut_list)
allocate(evecs(ndets_all_procs, nexcit), stat=ierr)
if (ierr /= 0) call stop_all(this_routine, "Error allocating eigenvectors array.")
evecs = 0.0_dp
allocate(evec_abs(ndets_all_procs), stat=ierr)
if (ierr /= 0) call stop_all(this_routine, "Error allocating evec_abs array.")
evec_abs = 0
end if
! Perform the Lanczos procedure in parallel.
if (t_non_hermitian_2_body) then
call stop_all(this_routine, &
"perform_lanczos not implemented for non-hermitian Hamiltonians!")
end if
call perform_lanczos(lanczosCalc, det_list, nexcit, parallel_sparse_hamil_type, .true.)
if (iProcIndex == root) then
evals = lanczosCalc%eigenvalues(1:nexcit)
evecs = lanczosCalc%eigenvectors(1:lanczosCalc%super%space_size, 1:nexcit)
! For consistency between compilers, enforce a rule for the sign of
! the eigenvector. To do this, make sure that the largest component
! of each vector is positive. The largest component is found using
! maxloc. If there are multiple determinants with the same weight
! then the FORTRAN standard says that the first such component will
! be used, so this will hopefully be consistent across compilers.
! To avoid numerical errors bwteen compilers for such elements, we
! also round the array components to integers. Because evecs will
! be normalised, also multiply by 100000 before rounding.
do i = 1, nexcit
! First find the maximum element.
evec_abs = nint(100000.0_dp * abs(evecs(:, i)))
max_elem_ind = maxloc(evec_abs)
if (abs(evecs(max_elem_ind(1), i)) < 0.0_dp) then
evecs(:, i) = -evecs(:, i)
end if
end do
deallocate(det_list)
deallocate(evec_abs)
! Reorder the trial states, if the user has asked for this to be done.
if (present(reorder)) then
temp_reorder = reorder
call sort(temp_reorder, evals)
temp_reorder = reorder
call sort(temp_reorder, evecs)
end if
! Unfortunately to perform the MPIScatterV call we need the transpose
! of the eigenvector array.
safe_malloc(evecs_transpose, (nexcit, ndets_all_procs))
evecs_transpose = transpose(evecs)
else
safe_free(ilut_list)
end if
call MPIBCast(evals, size(evals), root)
ndets_this_proc_mpi = space_sizes(iProcIndex)
! The number of elements to send and receive in the MPI call, and the
! displacements.
sndcnts = space_sizes * int(nexcit, MPIArg)
rcvcnts = ndets_this_proc_mpi * int(nexcit, MPIArg)
displs = space_displs * int(nexcit, MPIArg)
! Send the components to the correct processors using the following
! array as temporary space.
allocate(evecs_this_proc(nexcit, ndets_this_proc))
call MPIScatterV(evecs_transpose, sndcnts, displs, evecs_this_proc, rcvcnts, ierr)
if (ierr /= 0) call stop_all(this_routine, "Error in MPIScatterV call.")
! Clean up.
if (iProcIndex == root) deallocate(evecs)
safe_free(evecs_transpose)
call DestroyLanczosCalc(lanczosCalc)
safe_free(sparse_ham)
end subroutine calc_trial_states_lanczos