calc_trial_states_lanczos Subroutine

public subroutine calc_trial_states_lanczos(space_in, nexcit, ndets_this_proc, trial_iluts, evecs_this_proc, evals, space_sizes, space_displs, reorder)


Type IntentOptional Attributes Name
type(subspace_in) :: space_in
integer, intent(in) :: nexcit
integer, intent(out) :: ndets_this_proc
integer(kind=n_int), intent(out) :: trial_iluts(0:,:)
real(kind=dp), intent(out), allocatable :: evecs_this_proc(:,:)
real(kind=dp), intent(out) :: evals(:)
integer(kind=MPIArg), intent(out) :: space_sizes(0:nProcessors-1)
integer(kind=MPIArg), intent(out) :: space_displs(0:nProcessors-1)
integer, intent(in), optional :: reorder(nexcit)


Source Code

    subroutine calc_trial_states_lanczos(space_in, nexcit, ndets_this_proc, trial_iluts, evecs_this_proc, evals, &
                                         space_sizes, space_displs, reorder)

        use bit_reps, only: decode_bit_det
        use CalcData, only: subspace_in, t_force_lanczos
        use DetBitOps, only: ilut_lt, ilut_gt
        use FciMCData, only: ilutHF, CurrentDets, TotWalkers
        use lanczos_wrapper, only: frsblk_wrapper
        use Parallel_neci, only: MPIScatterV, MPIGatherV, MPIBCast, MPIArg, iProcIndex
        use Parallel_neci, only: nProcessors
        use MPI_wrapper, only: root
        use sort_mod, only: sort
        use SystemData, only: nel, tAllSymSectors, tGUGA
        use sparse_arrays, only: calculate_sparse_ham_par, sparse_ham

        use hamiltonian_linalg, only: sparse_hamil_type, parallel_sparse_hamil_type

        use lanczos_general, only: LanczosCalcType, perform_lanczos, DestroyLanczosCalc

        type(subspace_in) :: space_in
        integer, intent(in) :: nexcit
        integer, intent(out) :: ndets_this_proc
        integer(n_int), intent(out) :: trial_iluts(0:, :)
        HElement_t(dp), allocatable, intent(out) :: evecs_this_proc(:, :)
        real(dp), intent(out) :: evals(:)
        integer(MPIArg), intent(out) :: space_sizes(0:nProcessors - 1), space_displs(0:nProcessors - 1)
        integer, optional, intent(in) :: reorder(nexcit)

        integer(n_int), allocatable :: ilut_list(:, :)
        integer, allocatable :: det_list(:, :)
        integer :: i, max_elem_ind(1), ierr
        integer :: temp_reorder(nexcit)
        integer(MPIArg) :: ndets_all_procs, ndets_this_proc_mpi
        integer(MPIArg) :: sndcnts(0:nProcessors - 1), displs(0:nProcessors - 1)
        integer(MPIArg) :: rcvcnts
        integer, allocatable :: evec_abs(:)
        HElement_t(dp), allocatable :: evecs(:, :), evecs_transpose(:, :)
        character(len=*), parameter :: this_routine = "calc_trial_states_lanczos"

        type(LanczosCalcType) :: lanczosCalc

        ndets_this_proc = 0
        trial_iluts = 0_n_int

        ! do some GUGA checks to abort non-supported trial wavefunctions
        if (tGUGA .and. (space_in%tCAS .or. space_in%tRAS .or. space_in%tFCI)) then
            call stop_all(this_routine, "non-supported trial space for GUGA!")
        end if

        write(stdout, *) " Initialising wavefunctions by the Lanczos algorithm"

        ! Choose the correct generating routine.
        if (space_in%tHF) call add_state_to_space(ilutHF, trial_iluts, ndets_this_proc)
        if (space_in%tPops) call generate_space_most_populated(space_in%npops, &
                                                     space_in%tApproxSpace, space_in%nApproxSpace, trial_iluts, ndets_this_proc, GLOBAL_RUN)
        if (space_in%tRead) call generate_space_from_file(space_in%read_filename, trial_iluts, ndets_this_proc)
        if (space_in%tDoubles) then
            if (tGUGA) then
                call generate_sing_doub_guga(trial_iluts, ndets_this_proc, .false.)
                call generate_sing_doub_determinants(trial_iluts, ndets_this_proc, .false.)
            end if
        end if
        if (space_in%tCAS) call generate_cas(space_in%occ_cas, space_in%virt_cas, trial_iluts, ndets_this_proc)
        if (space_in%tRAS) call generate_ras(space_in%ras, trial_iluts, ndets_this_proc)
        if (space_in%tOptimised) call generate_optimised_space(space_in%opt_data, space_in%tLimitSpace, &
                                                               trial_iluts, ndets_this_proc, space_in%max_size)
        if (space_in%tMP1) call generate_using_mp1_criterion(space_in%mp1_ndets, trial_iluts, ndets_this_proc)
        if (space_in%tFCI) then
            if (tAllSymSectors) then
                call gndts_all_sym_this_proc(trial_iluts, .true., ndets_this_proc)
                call generate_fci_core(trial_iluts, ndets_this_proc)
            end if
        end if

        if (.not. (space_in%tPops .or. space_in%tRead .or. space_in%tDoubles .or. space_in%tCAS .or. &
                   space_in%tRAS .or. space_in%tOptimised .or. space_in%tMP1 .or. space_in%tFCI)) then
            call stop_all(this_routine, "A space for the trial functions was not chosen.")
        end if

        ndets_this_proc_mpi = int(ndets_this_proc, MPIArg)
        call MPIAllGather(ndets_this_proc_mpi, space_sizes, ierr)
        ndets_all_procs = sum(space_sizes)

        if (ndets_all_procs < lanczos_space_size_cutoff .and. .not. t_force_lanczos) then
            write(stdout, *) " Aborting Lanczos and initialising trial states with direct diagonalisation"
            call calc_trial_states_direct(space_in, nexcit, ndets_this_proc, trial_iluts, evecs_this_proc, evals, &
                                          space_sizes, space_displs, reorder)
        end if

        if (ndets_all_procs < nexcit) call stop_all(this_routine, "The number of excited states that you have asked &
            &for is larger than the size of the trial space used to create the excited states. Since this &
            &routine generates trial states that are orthogonal, this is not possible.")

        space_displs(0) = 0_MPIArg
        do i = 1, nProcessors - 1
            space_displs(i) = space_displs(i - 1) + space_sizes(i - 1)
        end do

        call sort(trial_iluts(:, 1:ndets_this_proc), ilut_lt, ilut_gt)

        if (iProcIndex == root) then
            allocate(ilut_list(0:NIfTot, ndets_all_procs))
            ! On these other processes ilut_list and evecs_transpose are not
            ! needed, but we need them to be allocated for the MPI wrapper
            ! function to work, so just allocate them to be small.
            allocate(ilut_list(1, 1))
            allocate(evecs_transpose(1, 1))
        end if

        call MPIGatherV(trial_iluts(:, 1:space_sizes(iProcIndex)), ilut_list, &
                        space_sizes, space_displs, ierr)

        call calculate_sparse_ham_par(space_sizes, trial_iluts, .true.)

        call MPIBarrier(ierr)

        if (iProcIndex == root) then
            allocate(det_list(nel, ndets_all_procs))

            do i = 1, ndets_all_procs
                call decode_bit_det(det_list(:, i), ilut_list(:, i))
            end do


            allocate(evecs(ndets_all_procs, nexcit), stat=ierr)
            if (ierr /= 0) call stop_all(this_routine, "Error allocating eigenvectors array.")
            evecs = 0.0_dp

            allocate(evec_abs(ndets_all_procs), stat=ierr)
            if (ierr /= 0) call stop_all(this_routine, "Error allocating evec_abs array.")
            evec_abs = 0
        end if

        ! Perform the Lanczos procedure in parallel.
        if (t_non_hermitian_2_body) then
            call stop_all(this_routine, &
                          "perform_lanczos not implemented for non-hermitian Hamiltonians!")
        end if

        call perform_lanczos(lanczosCalc, det_list, nexcit, parallel_sparse_hamil_type, .true.)

        if (iProcIndex == root) then
            evals = lanczosCalc%eigenvalues(1:nexcit)
            evecs = lanczosCalc%eigenvectors(1:lanczosCalc%super%space_size, 1:nexcit)

            ! For consistency between compilers, enforce a rule for the sign of
            ! the eigenvector. To do this, make sure that the largest component
            ! of each vector is positive. The largest component is found using
            ! maxloc. If there are multiple determinants with the same weight
            ! then the FORTRAN standard says that the first such component will
            ! be used, so this will hopefully be consistent across compilers.
            ! To avoid numerical errors bwteen compilers for such elements, we
            ! also round the array components to integers. Because evecs will
            ! be normalised, also multiply by 100000 before rounding.
            do i = 1, nexcit
                ! First find the maximum element.
                evec_abs = nint(100000.0_dp * abs(evecs(:, i)))
                max_elem_ind = maxloc(evec_abs)
                if (abs(evecs(max_elem_ind(1), i)) < 0.0_dp) then
                    evecs(:, i) = -evecs(:, i)
                end if
            end do


            ! Reorder the trial states, if the user has asked for this to be done.
            if (present(reorder)) then
                temp_reorder = reorder
                call sort(temp_reorder, evals)
                temp_reorder = reorder
                call sort(temp_reorder, evecs)
            end if

            ! Unfortunately to perform the MPIScatterV call we need the transpose
            ! of the eigenvector array.
            safe_malloc(evecs_transpose, (nexcit, ndets_all_procs))
            evecs_transpose = transpose(evecs)
        end if

        call MPIBCast(evals, size(evals), root)

        ndets_this_proc_mpi = space_sizes(iProcIndex)
        ! The number of elements to send and receive in the MPI call, and the
        ! displacements.
        sndcnts = space_sizes * int(nexcit, MPIArg)
        rcvcnts = ndets_this_proc_mpi * int(nexcit, MPIArg)
        displs = space_displs * int(nexcit, MPIArg)

        ! Send the components to the correct processors using the following
        ! array as temporary space.
        allocate(evecs_this_proc(nexcit, ndets_this_proc))
        call MPIScatterV(evecs_transpose, sndcnts, displs, evecs_this_proc, rcvcnts, ierr)
        if (ierr /= 0) call stop_all(this_routine, "Error in MPIScatterV call.")

        ! Clean up.
        if (iProcIndex == root) deallocate(evecs)
        call DestroyLanczosCalc(lanczosCalc)

    end subroutine calc_trial_states_lanczos