semi_stoch_gen.F90 Source File


Contents

Source Code


Source Code

#include "macros.h"

module semi_stoch_gen
    use CalcData, only: opt_space_data, ss_space_in, &
        tStaticCore, tTruncInitiator, tDetermProjApproxHamil, &
        t_choose_trial_state, semistoch_shift_iter, tReadPops, &
        tStartCAS, t_fast_pops_core, tTrialInit, &
        trial_excit_choice, t_core_inits, tUseRealCoeffs
    use constants, only: dp, int64, n_int, stdout, stderr, &
        maxExcit, lenof_sign, size_n_int, sizeof_int, bits_n_int, eps, &
        inum_runs
    use DetBitOps, only: DetBitLT, EncodeBitDet, IsAllowedHPHF, spin_sym_ilut, ilut_lt, ilut_gt
    use DeterminantData, only: write_det
    use FciMCData, only: HFDet, ilutHF, t_global_core_space, &
        MaxWalkersPart, NoInitDets, AllNoInitDets, SemiStoch_Davidson_Time, &
        SemiStoch_nonhermit_Time, SpawnedParts, var_size_this_proc, &
        temp_var_space, TotWalkers, TotWalkersOld, SpawnedParts, &
        iter_data_fciqmc, tFillingStochRdmOnFly, SemiStoch_Hamil_Time, &
        tStartCoreGroundState, iter_data_fciqmc, SemiStoch_Init_Time, &
        var_size_this_proc, var_sizes, var_displs, temp_var_space, &
        var_space_size, var_space_size_int, var_space, CurrentDets
    use LoggingData, only: tWriteCore, tRDMonFly, t_print_core_info, &
        t_print_core_hamil, t_print_core_vec
    use MPI_wrapper, only: root
    use MemoryManager, only: TagIntType, LogMemAlloc, LogMemDealloc
    use Parallel_neci, only: MPIAlltoAll, MPIAlltoAllv, MPISum, &
        iProcIndex, nProcessors, MPIArg, MPIAllGatherV, MPIAllGather, &
        MPIScatter, MPIScatterV, MPIBarrier
    use SymExcit2, only: gensymexcitit2par_worker
    use SymExcit3, only: GenExcitations3
    use SymExcit4, only: GenExcitations4, ExcitGenSessionType
    use SystemData, only: nel, G1, tUseBrillouin, nBasis, &
        nbasis, BRR, nBasisMax, tSpn, lms, tParity, SymRestrict, tSymSet, &
        STOT, tAllSymSectors, tReltvy, nOccAlpha, nOccBeta, tHub, &
        tUEG, tReal, tNoSingExcits, tKPntSym, tReltvy, &
        tSpn, LMS, BasisFn, BRR, tAllSymSectors, &
        tGUGA, nel, t_mol_3_body, &
        t_non_hermitian_2_body, tHPHF
    use bit_rep_data, only: NIfD, NIfTot, nIfGUGA, flag_static_init, &
        extract_sign, flag_deterministic
    use bit_reps, only: decode_bit_det, set_flag, clr_flag_multi, encode_sign, &
        get_initiator_flag_by_run
    use core_space_util, only: core_space_t, cs_replicas, deallocate_sparse_ham
    use davidson_neci, only: DavidsonCalcType, perform_davidson, DestroyDavidsonCalc
    use determinants, only: get_helement
    use enumerate_excitations, only: generate_connected_space
    use fast_determ_hamil, only: initialise_shared_rht, &
        calc_approx_hamil_sparse_hphf, calc_determ_hamil_sparse_hphf, &
        initialise_shared_rht, calc_determ_hamil_sparse, &
        calculate_sparse_hamiltonian_non_hermitian, var_ht, &
        calc_determ_hamil_opt_hphf, &
        calc_determ_hamil_opt, calculate_sparse_hamiltonian
    use gndts_mod, only: gndts, gndts_all_sym_this_proc
    use guga_bitRepOps, only: convert_ilut_toGUGA, convert_ilut_toNECI, &
        isProperCSF_flexible, write_guga_list, CSF_Info_t
    use guga_data, only: tGUGACore
    use guga_excitations, only: actHamiltonian
    use hamiltonian_linalg, only: sparse_hamil_type
    use lattice_models_utils, only: make_ilutJ
    use load_balance, only: adjust_load_balance
    use load_balance_calcnodes, only: DetermineDetNode, tLoadBalanceBlocks
    use matrix_util, only: print_vec
    use ras, only: ras_class_data, initialise_ras_space, tot_nelec, &
        tot_norbs, find_ras_size, generate_entire_ras_space, &
        ras_parameters
    use searching, only: remove_repeated_states
    use semi_stoch_procs, only: &
        end_semistoch, subspace_in, &
        add_core_states_currentdet_hash, sort_space_by_proc, &
        proc_most_populated_states, core_space_weight, &
        print_basis, print_hamiltonian, global_run, &
        core_run, store_whole_core_space, write_core_space, &
        diagonalize_core_non_hermitian, &
        diagonalize_core, reinit_current_trial_amps, fill_in_diag_helements, &
        return_mp1_amp_and_mp2_energy, remove_high_energy_orbs, &
        start_walkers_from_core_ground_nonhermit, &
        generate_core_connections, start_walkers_from_core_ground, &
        return_largest_indices, return_proc_share

    use shared_rhash, only: initialise_shared_rht
    use sort_mod, only: sort
    use sparse_arrays, only: HDiagTag, SparseHamilTags, sparse_ham, hamil_diag
    use sym_general_mod, only: IsSymAllowedExcitMat
    use sym_mod, only: getsym
    use timing_neci, only: timer, set_timer, halt_timer, get_total_time
    use util_mod, only: binary_search_real, get_free_unit, near_zero, &
        operator(.div.), warning_neci, neci_flush
    use error_handling_neci, only: stop_all

    better_implicit_none
    private
    public :: init_semi_stochastic, generate_space_most_populated, &
        generate_using_mp1_criterion, &
        reset_core_space, add_state_to_space, &
        generate_ras, generate_cas, generate_fci_core, &
        generate_space_from_file, generate_sing_doub_guga, &
        generate_sing_doub_determinants, generate_optimised_space, &
        end_semistoch, enumerate_sing_doub_kpnt, &
        write_most_pop_core_at_end, refresh_semistochastic_space

contains

    subroutine init_semi_stochastic(core_in, tStartedFromCoreGround)

        ! Initialise the semi-stochastic information. This includes enumerating a list of all
        ! determinants or CSFs in the deterministic space and calculating and storing the resulting
        ! Hamiltonian matrix elements. The lists which will store the walker amplitude vectors in
        ! the deterministic space are also allocated.

        type(subspace_in) :: core_in
        logical, intent(out) :: tStartedFromCoreGround

        integer :: i, ierr, run, num_core_runs
        integer :: nI(nel)
        integer(MPIArg) :: mpi_temp
        character(len=*), parameter :: t_r = "init_semi_stochastic"
#ifndef CMPLX_
        real(dp), allocatable :: e_values(:)
        HElement_t(dp), allocatable :: e_vectors(:, :), gs_vector(:)
        real(dp) :: gs_energy
#endif
        ! If we are load balancing, this gets disabled once semi stochastic
        ! has been initialised. Therefore we should do a last-gasp load
        ! adjustment at this point.
        if (tLoadBalanceBlocks .and. .not. tFillingStochRDMOnFly) then
            call adjust_load_balance(iter_data_fciqmc)
        end if

        call MPIBarrier(ierr, tTimeIn=.false.)

        call set_timer(SemiStoch_Init_Time)

        if (t_global_core_space) then
            num_core_runs = 1
        else
            num_core_runs = inum_runs
        end if
        ! Allocate the corespace replicas
        allocate(cs_replicas(num_core_runs))

        write(stdout, '(/,12("="),1x,a30,1x,12("="))') "Semi-stochastic initialisation"
        call neci_flush(stdout)

        do run = 1, size(cs_replicas)
            associate(rep => cs_replicas(run))
                allocate(rep%determ_sizes(0:nProcessors - 1))
                allocate(rep%determ_displs(0:nProcessors - 1))
                allocate(rep%determ_last(0:nProcessors - 1))
                call rep%associate_run(t_global_core_space, run)
                rep%determ_sizes = 0_MPIArg
                rep%determ_last = 0_MPIArg

                if (.not. (tStartCAS .or. core_in%tPops .or. core_in%tDoubles .or. core_in%tCAS .or. core_in%tRAS .or. &
                           core_in%tOptimised .or. core_in%tLowE .or. core_in%tRead .or. core_in%tMP1 .or. &
                           core_in%tFCI .or. core_in%tHeisenbergFCI .or. core_in%tHF .or. &
                           core_in%tPopsAuto .or. core_in%tPopsProportion)) then
                    call stop_all("init_semi_stochastic", "You have not selected a semi-stochastic core space to use.")
                end if
                if (.not. tUseRealCoeffs) call stop_all(t_r, "To use semi-stochastic you must also use real coefficients.")

                ! Call the enumerating subroutines to create all excitations and add these states to
                ! SpawnedParts on the correct processor. As they do this, they count the size of the
                ! deterministic space (on their own processor only).
                write(stdout, '("Generating the deterministic space...")'); call neci_flush(stdout)
                if (core_in%tPopsAuto) then
                    write(stdout, '("Choosing 10% of initiator space as core space, if this number is larger than 50000, then use 50000")')
                    call neci_flush(stdout)
                    ! from my understanding npops refers to the total core space size
                    write(stdout, '("Estimated size of core space:",1X,i5)') int(AllNoInitDets(run) * 0.1_dp)
                    call neci_flush(stdout)
                    if (int(AllNoInitDets(run) * 0.1_dp) > 50000) then
                        core_in%npops = 50000
                    else
                        core_in%npops = int(AllNoInitDets(run) * 0.1_dp)
                    end if
                    ! might also need to check if the total space is too large so that
                    ! tApproxSpace should be used instead...
                end if

                if (core_in%tPopsProportion) then
                    write(stdout, '("Choosing ",F7.2,"% of initiator space as core space")') 100 * core_in%npops_proportion
                    write(stdout, *) "Estimated size of core space: ", int(AllNoInitDets(run) * core_in%npops_proportion)
                    core_in%npops = max(1, int(AllNoInitDets(run) * core_in%npops_proportion))
                end if

                if (core_in%tApproxSpace) write(stdout, '(" ... approximately using the factor of",1X,i5)') core_in%nApproxSpace
                call generate_space(core_in, run)

                ! So that all procs store the size of the deterministic spaces on all procs.
                mpi_temp = rep%determ_sizes(iProcIndex)
                call MPIAllGather(mpi_temp, rep%determ_sizes, ierr)

                rep%determ_space_size = sum(rep%determ_sizes)
                rep%determ_space_size_int = int(rep%determ_space_size)

                ! This is now the total size on the replica with the largest space
                ! Typically, all replicas will have either similar or the same space size
                write(stdout, '("Total size of deterministic space:",1X,i8)') rep%determ_space_size
                if (rep%determ_space_size > (AllNoInitDets(run) .div. 2_int64) .and. tTruncInitiator) then
                    write(stdout, *)"WARNING: Total size of deterministic space is greater than&
                        & 50% of the initiator space."
                    write(stdout, *)"         Reducing the size of the deterministic space is&
                        & encouraged."
                    if (iProcIndex == 0) then
                        write(stderr, *)"WARNING: Total size of deterministic space is greater than&
                            & 50% of the initiator space."
                        write(stderr, *)"         Reducing the size of the deterministic space is&
                            & encouraged."
                    end if
                end if
                write(stdout, '("Size of deterministic space on this processor:",1X,i8)') rep%determ_sizes(iProcIndex)
                call neci_flush(stdout)

                call rep%alloc_wf()

                ! This array will hold the positions of the deterministic states in CurrentDets.
                allocate(rep%indices_of_determ_states(rep%determ_sizes(iProcIndex)), stat=ierr)
                call LogMemAlloc('indices_of_determ_states', int(rep%determ_sizes(iProcIndex)), &
                                  sizeof_int, t_r, rep%IDetermTag, ierr)

                ! Calculate the indices in the full vector at which the various processors end.
                rep%determ_last(0) = rep%determ_sizes(0)
                do i = 1, nProcessors - 1
                    rep%determ_last(i) = rep%determ_last(i - 1) + rep%determ_sizes(i)
                end do

                call sort(spawnedparts(0:NIfTot, 1:rep%determ_sizes(iprocindex)), ilut_lt, ilut_gt)

                ! Do a check that no states are in the deterministic space twice. The list is sorted
                ! already so simply check states next to each other in the list.
                do i = 2, rep%determ_sizes(iProcIndex)
                    if (all(SpawnedParts(0:nifd, i - 1) == SpawnedParts(0:nifd, i))) then
                        call decode_bit_det(nI, SpawnedParts(:, i))
                        write(stdout, '("State found twice:")')
                        write(stdout, *) SpawnedParts(:, i)
                        call write_det(stdout, nI, .true.)
                        call stop_all("init_semi_stochastic", &
                                      "The same state has been found twice in the deterministic space.")
                    end if
                end do

                ! Store every core determinant from all processors on all processors, in core_space.
                call store_whole_core_space(rep)
                ! Create the hash table to address the core determinants.
                call initialise_shared_rht(rep%core_space, rep%determ_space_size_int, rep%core_ht)

                if (tWriteCore) call write_core_space(rep)

                write(stdout, '("Generating the Hamiltonian in the deterministic space...")'); call neci_flush(stdout)
                if (tAllSymSectors .or. tReltvy .or. nOccAlpha <= 1 .or. nOccBeta <= 1 &
                    .or. tGUGA .or. t_mol_3_body .or. t_non_hermitian_2_body) then
                    ! In the above cases the faster generation is not implemented, so
                    ! use the original algorithm.
                    call set_timer(SemiStoch_Hamil_Time)
                    if (tHPHF) then
                        call calc_determ_hamil_sparse_hphf(rep)
                    else
                        call calc_determ_hamil_sparse(rep)
                    end if
                    call halt_timer(SemiStoch_Hamil_Time)
                    write(stdout, '("Total time (seconds) taken for Hamiltonian generation:", f9.3)') &
                        get_total_time(SemiStoch_Hamil_Time)
                else
                    if (tHPHF) then
                        call calc_determ_hamil_opt_hphf(rep)
                    else
                        call calc_determ_hamil_opt(rep)
                    end if
                end if

#ifndef CMPLX_
                if (t_print_core_info) then
                    ! i think i also want information, like the energy and the
                    ! eigenvectors of the core-space
                    if (t_print_core_hamil) then
                        call print_basis(rep)
                        call print_hamiltonian(rep)
                    end if
                    root_print "I am before the diagonalization step with", t_non_hermitian_2_body
                    if (t_non_hermitian_2_body) then
                        call diagonalize_core_non_hermitian(e_values, e_vectors, rep)
                        if (t_choose_trial_state) then
                            gs_energy = e_values(trial_excit_choice(1))
                        else
                            gs_energy = e_values(1)
                        end if
                    else
                        call diagonalize_core(gs_energy, gs_vector, rep)
                    end if
                    root_print "semi-stochastic space GS energy: ", gs_energy

                    if (t_print_core_vec) then
                        root_print "semi-stochastic gs vector:"
                        call print_vec(gs_vector, "semistoch-vec")
                    end if
                end if
#endif

                if (tRDMonFly) call generate_core_connections(rep)

                if (tDetermProjApproxHamil) call init_var_space(rep)

                ! Move the states to CurrentDets.
                call add_core_states_currentdet_hash(run)

                ! If using a trial wavefunction, and that initialisation has already
                ! been performed, then the current_trial_amps array needs correcting
                ! after the core states were added and sorted into CurrentDets.
                call reinit_current_trial_amps()

                ! If starting from a popsfile then global_determinant_data will not
                ! have been initialised, or if in the middle of a calculation then new
                ! determinants may have been added.
                if (tReadPops .or. (semistoch_shift_iter /= 0)) call fill_in_diag_helements()

                SpawnedParts = 0_n_int
                TotWalkersOld = TotWalkers

                tStartedFromCoreGround = .false.
                if (tStartCoreGroundState .and. (.not. tReadPops) .and. tStaticCore .and. (.not. tTrialInit)) then
                    if (t_non_hermitian_2_body) then
                        call set_timer(SemiStoch_nonhermit_Time)
                        call start_walkers_from_core_ground_nonhermit(tPrintInfo=.true., run=run)
                        call halt_timer(SemiStoch_nonhermit_Time)
                        tStartedFromCoreGround = .true.
                        write(stdout, '("Total time (seconds) taken for non-hermitian diagonalization:", f9.3)') &
                            get_total_time(SemiStoch_nonhermit_Time)
                    else
                        call set_timer(SemiStoch_Davidson_Time)
                        call start_walkers_from_core_ground(tPrintInfo=.true., run=run)
                        call halt_timer(SemiStoch_Davidson_Time)
                        tStartedFromCoreGround = .true.
                        write(stdout, '("Total time (seconds) taken for Davidson calculation:", f9.3)') &
                            get_total_time(SemiStoch_Davidson_Time)
                    end if
                end if
            end associate
        end do
        ! Call MPIBarrier here so that Semistoch_Init_Time will give the
        ! initialisation time for all processors to finish.
        call MPIBarrier(ierr, tTimeIn=.false.)

        call halt_timer(SemiStoch_Init_Time)

        write(stdout, '("Semi-stochastic initialisation complete.")')
        write(stdout, '("Time (seconds) taken for semi-stochastic initialisation:", f9.3, /)') &
            get_total_time(SemiStoch_Init_Time)
        call neci_flush(stdout)

    end subroutine init_semi_stochastic

    subroutine generate_space(core_in, run)

        ! A wrapper to call the correct generating routine.

        type(subspace_in) :: core_in
        integer, intent(in) :: run
        integer :: space_size, i, ierr, c_run
        real(dp) :: zero_sign(lenof_sign)

        space_size = 0
        if(t_global_core_space) then
            c_run = GLOBAL_RUN
        else
            c_run = run
        end if

        ! Call the requested generating routines.
        if (core_in%tHF) call add_state_to_space(ilutHF, SpawnedParts, space_size)
        if (core_in%tPops) call generate_space_most_populated(core_in%npops, &
                                                              core_in%tApproxSpace, core_in%nApproxSpace, &
                                                              SpawnedParts, space_size, c_run, t_opt_fast_core=t_fast_pops_core)
        if (core_in%tRead) call generate_space_from_file(core_in%read_filename, SpawnedParts, space_size)
        if (.not. (tGUGACore)) then
            if (core_in%tDoubles) call generate_sing_doub_determinants(SpawnedParts, space_size, core_in%tHFConn)
            if (core_in%tTriples) call generate_trip_determinants(SpawnedParts, space_size, &
                                                                  core_in%tHFConn)
            if (core_in%tCAS) call generate_cas(core_in%occ_cas, core_in%virt_cas, SpawnedParts, space_size)
            if (core_in%tRAS) call generate_ras(core_in%ras, SpawnedParts, space_size)
            if (core_in%tOptimised) call generate_optimised_space(core_in%opt_data, core_in%tLimitSpace, &
                                                                  SpawnedParts, space_size, core_in%max_size)
            if (core_in%tMP1) call generate_using_mp1_criterion(core_in%mp1_ndets, SpawnedParts, space_size)
            if (core_in%tFCI) then
                if (tAllSymSectors) then
                    call gndts_all_sym_this_proc(SpawnedParts, .false., space_size)
                else
                    call generate_fci_core(SpawnedParts, space_size)
                end if
            end if

        else if (tGUGACore) then
            if (core_in%tDoubles) then
                call generate_sing_doub_guga(SpawnedParts, space_size, core_in%tHFConn)
            else if (core_in%tCAS) then
                call stop_all("init_semi_stochastic", "CAS core space with CSFs is not &
                              &currently implemented.")
            else if (core_in%tCAS) then
                call stop_all("init_semi_stochastic", "Cannot use a RAS core space with &
                              &CSFs.")

            else if (core_in%tOptimised) then
                call generate_optimised_space(core_in%opt_data, core_in%tLimitSpace, &
                                              SpawnedParts, space_size, core_in%max_size)

            else if (core_in%tLowE) then
                call stop_all("init_semi_stochastic", "Low energy core space with CSFs is not &
                              &currently implemented.")
            else if (core_in%tMP1) then
                call generate_using_mp1_criterion(core_in%mp1_ndets, SpawnedParts, space_size)
            end if
            if (core_in%tFCI) call generate_fci_core(SpawnedParts, space_size)
        end if

        ! If two different deterministic spaces have been called then there may
        ! be some repeated states. We don't want repeats, so remove them and
        ! update space_size accordingly.
        call remove_repeated_states(SpawnedParts, space_size)

        if (tDetermProjApproxHamil) then
            var_size_this_proc = space_size
            allocate(temp_var_space(0:NIfTot, var_size_this_proc), stat=ierr)
            temp_var_space = SpawnedParts(:, 1:var_size_this_proc)
        end if

        ! Create and use the space of all connections to the current space
        if (core_in%tAllConnCore) then
            call generate_all_conn_space(SpawnedParts, space_size)
            call remove_repeated_states(SpawnedParts, space_size)
        end if
        associate(rep => cs_replicas(run))
            zero_sign = 0.0_dp
            do i = 1, space_size
                call encode_sign(SpawnedParts(:, i), zero_sign)
                call set_flag(SpawnedParts(:, i), flag_deterministic(run))
                if (tTruncInitiator .and. t_core_inits) then
                    do c_run = rep%first_run(), rep%last_run()
                        call set_flag(SpawnedParts(:, i), get_initiator_flag_by_run(c_run))
                        call set_flag(CurrentDets(:, i), flag_static_init(c_run))
                    end do
                end if
            end do

            ! Set the deterministic space size for this process.
            rep%determ_sizes(iProcIndex) = int(space_size, MPIArg)

            ! If requested, remove high energy orbitals so that the space size is below some max.
            if (core_in%tLimitSpace) then
                call remove_high_energy_orbs(SpawnedParts(0:NIfTot, 1:space_size), space_size, &
                                             core_in%max_size, .true.)
                rep%determ_sizes(iProcIndex) = int(space_size, MPIArg)
            end if
        end associate

    end subroutine generate_space

    subroutine generate_sing_doub_guga(ilut_list, space_size, only_keep_conn)
        ! routine to generate the singles and doubles core space from the
        ! HF (or current reference determinant) used in the semi-stochastic
        ! code when GUGA is in use
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size
        logical, intent(in) :: only_keep_conn

        integer(n_int) :: ilutG(0:nifguga)
        integer(n_int), allocatable :: excitations(:, :)
        integer :: nexcit, i
        integer(n_int) :: temp_ilut(0:niftot)
        HElement_t(dp) :: temp_hel
        integer :: temp_nI(nel)

        ! essentially use the acthamiltonian on the HFdet and since i
        ! only keep non-zero matrix elements anyway as output, the
        ! only_keep_conn is kind of implicitly .true. always..

        call add_state_to_space(ilutHF, ilut_list, space_size)

        ! to the exact guga excitation to the HF det
        call convert_ilut_toGUGA(ilutHF, ilutG)

        call actHamiltonian(ilutG, CSF_Info_t(ilutG), excitations, nexcit)

        do i = 1, nexcit
            ! check if matrix element is zero if we only want to keep the
            ! connected determinants(not sure if thats implicitly done in the
            ! actHamiltonian routine(i think it only keeps non-zero excitations
            call convert_ilut_toNECI(excitations(:, i), temp_ilut, temp_hel)

            if (only_keep_conn .and. near_zero(temp_hel)) cycle

            call decode_bit_det(temp_nI, temp_ilut)

            call add_state_to_space(temp_ilut, ilut_list, space_size, temp_nI)
        end do

    end subroutine generate_sing_doub_guga

    subroutine add_state_to_space(ilut, ilut_list, space_size, nI_in)

        ! This subroutine, takes a state, decides if it lives on this processor and,
        ! if so, adds it to ilut_list. It also adds one to the space size on this proc.

        ! In: ilut - The determinant in a bitstring form.
        ! In/Out: ilut_list - List of determinants to which ilut will be added.
        ! In/Out: space_size - The number of determinants belonging to this process.
        ! In (optional): nI_in - A list of the occupied orbitals in the determinant.

        integer(n_int), intent(in) :: ilut(0:NIfTot)
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size
        integer, optional, intent(in) :: nI_in(nel)

        integer :: nI(nel)
        integer :: proc

        ! If using HPHFs then only allow the correct HPHFs to be added to the list.
        if (tHPHF) then
            if (.not. IsAllowedHPHF(ilut(0:NIfD))) return
        end if

        ! Find the nI representation of determinant.
        if (present(nI_in)) then
            nI = nI_in
        else
            call decode_bit_det(nI, ilut)
        end if

        proc = DetermineDetNode(nel, nI, 0)

        if (.not. (proc == iProcIndex)) return

        space_size = space_size + 1
        ilut_list(0:, space_size) = 0_n_int
        ilut_list(0:NIfTot, space_size) = ilut(0:NIfTot)

    end subroutine add_state_to_space

    subroutine generate_sing_doub_determinants(ilut_list, space_size, only_keep_conn)

        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.

        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size
        logical, intent(in) :: only_keep_conn

        integer(n_int) :: ilut(0:NIfTot)
        integer :: nI(nel)
        integer :: excit(2, 2)
        integer :: nsing, ndoub, ex_flag
        logical :: tAllExcitFound, tParity
        HElement_t(dp) :: HEl

        type(ExcitGenSessionType) :: session

        ! Always generate both the single and double excitations.
        ex_flag = 3

        ! Start by adding the HF state.
        call add_state_to_space(ilutHF, ilut_list, space_size)

        if (tGUGA) then
            call stop_all("generate_sing_doub_determinants", &
                          "modify get_helement for GUGA")
        end if
        if (tKPntSym) then
            call enumerate_sing_doub_kpnt(ex_flag, only_keep_conn, nsing, ndoub, .true., ilut_list, space_size)
        else

            tAllExcitFound = .false.
            excit = 0

            do while (.true.)
                ! Generate the next determinant.
                if (tReltvy) then
                    call GenExcitations4(session, HFDet, nI, ex_flag, excit, tParity, tAllExcitFound, .false.)
                else
                    call GenExcitations3(HFDet, ilutHF, nI, ex_flag, excit, tParity, tAllExcitFound, .false.)
                end if
                if (tAllExcitFound) exit

                call EncodeBitDet(nI, ilut)
                ! If using a deterministic space connected to the Hartree-Fock
                ! then check that this determinant is actually connected to it!
                if (only_keep_conn) then
                    HEl = get_helement(HFDet, nI, ilutHF, ilut)
                    ! [W.D. 15.5.2017:]
                    ! is this still enough, even for Hamiltonians containing
                    ! complex entries??
                    ! and why is the cast to real(dp) done here??
!                     if (abs(real(HEl,dp)) < 1.e-12_dp) cycle
                    if (abs(HEl) < 1.e-12_dp) cycle
                end if
                call add_state_to_space(ilut, ilut_list, space_size, nI)
            end do
        end if

    end subroutine generate_sing_doub_determinants

!------------------------------------------------------------------------------------------!

    subroutine generate_trip_determinants(ilut_list, space_size, only_keep_conn)
        ! Generate a list of all singles, doubles and triples
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size
        logical, intent(in) :: only_keep_conn

        integer :: i, j, k
        integer :: a, b, c
        integer :: ex(2, 3), nI(nel)
        integer(n_int) :: ilut_ex(0:NifTot)
        HElement_t(dp) :: HEl
        ! add all triples of the HF
        ! enumerate them as follows: take three electrons i,j,k
        ! with i>j>k
        ! then, pick three (unocc) orbitals a,b,c with a>b>c
        ! => triple excitation
        do i = 1, nel
            ex(1, 1) = HFDet(i)
            do j = 1, i - 1
                ex(1, 2) = HFDet(j)
                do k = 1, j - 1
                    ex(1, 3) = HFDet(k)
                    do a = 1, nBasis
                        ! for the target orbs, only take unoccupied
                        if (IsNotOcc(ilutHF, a)) then
                            ex(2, 3) = a
                            do b = 1, a - 1
                                if (IsNotOcc(ilutHF, b)) then
                                    ex(2, 2) = b
                                    do c = 1, b - 1
                                        if (IsNotOcc(ilutHF, c)) then
                                            ex(2, 1) = c
                                            ! create the triple excitation with these elecs/orbs
                                            ilut_ex = make_ilutJ(ilutHF, ex, 3)
                                            ! if enabled, only keep connected determinants
                                            if (only_keep_conn) then
                                                HEl = get_helement(HFDet, nI, ilutHF, ilut_ex)
                                                if (abs(HEl) < eps) cycle
                                            end if

                                            ! definitely keep only determinants in the same symmetry-sector
                                            if (.not. IsSymAllowedExcitMat(ex, 3)) cycle
                                            call decode_bit_det(nI, ilut_ex)
                                            call add_state_to_space(ilut_ex, ilut_list, space_size, nI)
                                        end if
                                    end do
                                end if
                            end do
                        end if
                    end do
                end do
            end do
        end do

    end subroutine generate_trip_determinants

!------------------------------------------------------------------------------------------!

    subroutine generate_ras(ras_info, ilut_list, space_size)

        ! In: ras - Parameters for the RAS space.
        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.


        type(ras_parameters), intent(inout) :: ras_info
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size

        type(ras_class_data), allocatable, dimension(:) :: ras_classes
        integer(n_int), allocatable, dimension(:, :) :: temp_list
        integer :: temp_size, i

        tot_nelec = nel / 2
        tot_norbs = nbasis / 2

        ! Do a check that the RAS parameters are possible.
        if (ras_info%size_1 + ras_info%size_2 + ras_info%size_3 /= tot_norbs .or. &
            ras_info%min_1 > ras_info%size_1 * 2 .or. &
            ras_info%max_3 > ras_info%size_3 * 2) &
            call stop_all("generate_ras", "RAS parameters are not possible.")

        if (mod(nel, 2) /= 0) call stop_all("generate_ras", "RAS space only implmented for &
                                            & closed shell molecules.")

        call initialise_ras_space(ras_info, ras_classes)

        call find_ras_size(ras_info, ras_classes, temp_size)

        allocate(temp_list(0:NIfTot, temp_size))

        call generate_entire_ras_space(ras_info, ras_classes, temp_size, temp_list)

        do i = 1, temp_size
            call add_state_to_space(temp_list(:, i), ilut_list, space_size)
        end do

        deallocate(ras_classes)
        deallocate(temp_list)

    end subroutine generate_ras

    subroutine generate_cas(occ_orbs, virt_orbs, ilut_list, space_size)

        ! In: occ_orbs - The number of electrons in the CAS space.
        ! In: virt_orbs - The number of virtual spin orbitals in the CAS space.
        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.

        integer, intent(in) :: occ_orbs, virt_orbs
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size

        type(BasisFN) :: CASSym
        integer(n_int) :: ilut(0:NIfTot)
        integer :: iCASDet
        integer :: num_active_orbs, nCASDet, i, j, comp, ierr
        integer, allocatable :: CASBrr(:), CASRef(:)
        integer(n_int) :: cas_bitmask(0:NIfD), cas_not_bitmask(0:NIfD)
        integer, pointer :: CASDets(:, :) => null()
        integer(TagIntType) :: CASDetsTag
        character(len=*), parameter :: t_r = "generate_cas"

        ! Start by adding the HF state.
        call add_state_to_space(ilutHF, ilut_list, space_size)

        ! This option should be true. It tells the subroutine gndts to only consider states
        ! with an Ms value in the correct spin subspace.
        if (.not. tSpn) call stop_all("generate_cas", "tSpn is not set to true.")

        ! The total number of orbitals in the active space:
        num_active_orbs = occ_orbs + virt_orbs
        allocate(CASBrr(1:num_active_orbs))
        allocate(CASRef(1:occ_orbs))
        do i = 1, num_active_orbs
            ! Run through the cas space, and create an array which will map these orbtials to the
            ! orbitals they actually represent.
            ! i.e. CASBRR(1) will store the lowest energy orbital in the CAS space and
            ! CASBRR(num_active_orbs) will store the highest energy orbital in the CAS space.
            CASBrr(i) = BRR(i + (nel - occ_orbs))
        end do

        ! Create a bit mask which has 1's in the bits which represent active orbitals and 0's in
        ! all other orbitals.
        cas_bitmask = 0_n_int
        do i = 1, num_active_orbs
            set_orb(cas_bitmask, CASBrr(i))
        end do
        ! Create a bit mask which has 0's in the bits which represent active orbitals and 1's in
        ! all other orbitals.
        cas_not_bitmask = not(cas_bitmask)

        ! CASRef holds the part of the HF determinant in the active space.
        CASRef = CasBRR(1:occ_orbs)
        call sort(CasRef)
        call GetSym(CASRef, occ_orbs, G1, nBasisMax, CASSym)

        ! First, generate all excitations so we know how many there are, to allocate the arrays.
        call gndts(occ_orbs, num_active_orbs, CASBrr, nBasisMax, CASDets, &
                   .true., G1, tSpn, LMS, .true., CASSym, nCASDet, iCASDet)

        if (nCASDet == 0) call stop_all("generate_cas", "No CAS determinants found.")

        ! Now allocate the array CASDets. CASDets(:,i) will store the orbitals in the active space
        ! which are occupied in the i'th determinant generated.
        allocate(CASDets(occ_orbs, nCASDet), stat=ierr)
        call LogMemAlloc("CASDets", occ_orbs * nCASDet, 8, t_r, CASDetsTag, ierr)
        CASDets(:, :) = 0

        ! Now fill up CASDets...
        call gndts(occ_orbs, num_active_orbs, CASBrr, nBasisMax, CASDets, &
                   .false., G1, tSpn, LMS, .true., CASSym, nCASDet, iCASDet)

        do i = 1, nCASDet
            ! First, create the bitstring representing this state:
            ! Start from the HF determinant and apply cas_not_bitmask to clear all active space
            ! orbitals.
            ilut(0:NIfD) = iand(ilutHF(0:NIfD), cas_not_bitmask)
            ! Then loop through the occupied orbitals in the active space, stored in CASDets(:,i),
            ! and set the corresponding bits.
            do j = 1, occ_orbs
                set_orb(ilut, CASDets(j, i))
            end do

            ! The function gndts always outputs the reference state. This is already included, so
            ! we want to cycle when we get to this state to ignore it.
            ! comp will be 0 if ilut and ilutHF are the same.
            comp = DetBitLT(ilut, ilutHF, NIfD)
            if (comp == 0) cycle

            ! Now that we have fully generated the determinant, add it to the main list.
            call add_state_to_space(ilut, ilut_list, space_size)

        end do

        deallocate(CASBrr)
        deallocate(CASRef)
        deallocate(CASDets, stat=ierr)
        call LogMemDealloc(t_r, CASDetsTag, ierr)

    end subroutine generate_cas

    subroutine generate_optimised_space(opt_data, tLimitSpace, ilut_list, space_size, max_space_size)

        ! This routine generates a deterministic space by diagonalising a small fraction
        ! of the whole space, and choosing the basis states with the largest weights in
        ! the ground state for the deterministic states. This therefore aims to find
        ! some of the basis states with the largest weights in the true ground state.

        ! More specifically, we start with the Hartree-Fock state, and generate a first
        ! space by finding all states connected to it. We then find the ground state of
        ! the Hamiltonian in this space. Using this ground state, we pick out the basis
        ! states with the largest amplitudes (up to some user-specified criteria), and
        ! define these basis states as our new space. We then find all states connected
        ! to the states in this space, and diagonalise the Hamiltonian in this new space
        ! and again pick out the basis states with the largest weights. This process is
        ! iterated for as many time as the user requests.

        ! In: opt_data: Derived type containing the parameters for the algorithm to
        !     generate the space.
        ! In: tLimitSpace: If true then, every iteration of generating algorithm, after
        !     all connections have generated, the space is cut off to have a space with
        !     a maximum size of max_space_size. This is done by removing the highest
        !     energy determinants.
        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.
        ! In (optional): max_space_size - Only used if tLimitSpace is true. See
        !     tLimitSpace for an explanation of use.

        type(opt_space_data), intent(in) :: opt_data
        logical :: tLimitSpace
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size
        integer, optional, intent(in) :: max_space_size

        integer(n_int), allocatable, dimension(:, :) :: ilut_store, temp_space
        integer :: counter, i, j, ierr
        integer :: old_num_states, new_num_states
        integer(MPIArg) :: proc_space_sizes(0:nProcessors - 1), disps(0:nProcessors - 1), &
                           sendcounts(0:nProcessors - 1), recvcount, this_proc_size
        integer(TagIntType) :: IlutTag, TempTag
        character(len=*), parameter :: t_r = "generate_optimised_space"

        type(DavidsonCalcType) :: davidsonCalc

        if (iProcIndex /= root) then
            ! Allocate some space so that the MPIScatterV call does not crash.
            allocate(ilut_store(0:NIfTot, 1), stat=ierr)
            call LogMemAlloc("ilut_store", (NIfTot + 1), size_n_int, t_r, IlutTag, ierr)
        else if (iProcIndex == root) then
            ! Allocate the stores of ilut's that will hold these deterministic states.
            ! For now, assume that we won't have a deterministic space of more than one
            ! million states. Could make this user-specified later.
            allocate(ilut_store(0:NIfTot, 1000000), stat=ierr)
            call LogMemAlloc("ilut_store", 1000000 * (NIfTot + 1), size_n_int, t_r, IlutTag, ierr)
            allocate(temp_space(0:NIfTot, 1000000), stat=ierr)
            call LogMemAlloc("temp_store", 1000000 * (NIfTot + 1), size_n_int, t_r, TempTag, ierr)
            ilut_store = 0_n_int
            temp_space = 0_n_int

            ! Put the Hartree-Fock state in the list first.
            ilut_store(0:NIfTot, 1) = ilutHF(0:NIfTot)

            ! old_num_states will hold the number of deterministic states in the current
            ! space. This is just 1 for now, with only the Hartree-Fock.
            old_num_states = 1

            ! Now we start the iterating loop. Find all states which are either a single or
            ! double excitation from each state in the old ilut store, and then see if they
            ! have a non-zero Hamiltonian matrix element with any state in the old ilut store:

            ! Over the total number of iterations.
            do i = 1, opt_data%ngen_loops

                write(stdout, '(a37,1X,i2)') "Optimised space generation: Iteration", i
                call neci_flush(stdout)

                ! Find all states connected to the states currently in ilut_store.
                write(stdout, '(a29)') "Generating connected space..."
                call neci_flush(stdout)
                ! Allow for up to 1 million connected states to be created.
                new_num_states = 1000000
                call generate_connected_space(old_num_states, ilut_store(:, 1:old_num_states), &
                                              new_num_states, temp_space(:, 1:1000000))
                write(stdout, '(a26)') "Connected space generated."
                call neci_flush(stdout)

                ! Add these states to the ones already in the ilut stores.
                ilut_store(:, old_num_states + 1:old_num_states + new_num_states) = &
                    temp_space(:, 1:new_num_states)

                new_num_states = new_num_states + old_num_states

                call remove_repeated_states(ilut_store, new_num_states)

                write(stdout, '(i8,1X,a13)') new_num_states, "states found."
                call neci_flush(stdout)

                if (tLimitSpace) call remove_high_energy_orbs(ilut_store(:, 1:new_num_states), &
                                                              new_num_states, max_space_size, .false.)

                write(stdout, '(a27)') "Constructing Hamiltonian..."
                call neci_flush(stdout)

                if (t_non_hermitian_2_body) then
                    call calculate_sparse_hamiltonian_non_hermitian(new_num_states, ilut_store(:, 1:new_num_states))
                else
                    call calculate_sparse_hamiltonian(new_num_states, ilut_store(:, 1:new_num_states))
                end if

                write(stdout, '(a29)') "Performing diagonalisation..."
                call neci_flush(stdout)

                ! Now that the Hamiltonian is generated, we can finally find the ground state of it:
                if (t_non_hermitian_2_body) then
                    call stop_all(t_r, &
                                  "perform_davidson not adapted for non-hermitian Hamiltonians!")
                end if
                call perform_davidson(davidsonCalc, sparse_hamil_type, .false.)

                associate ( &
                    davidson_eigenvector => davidsonCalc%davidson_eigenvector &
                    )

                    ! davidson_eigenvector now stores the ground state eigenvector. We want to use the
                    ! vector whose components are the absolute values of this state:
                    davidson_eigenvector = abs(davidson_eigenvector)
                    ! Multiply by -1.0_dp so that the sort operation later is the right way around.
                    davidson_eigenvector = -1.0_dp * davidson_eigenvector

                    ! Now decide which states to keep for the next iteration. There are two ways of
                    ! doing this, as decided by the user. Either all basis states with an amplitude
                    ! in the ground state greater than a given value are kept (tAmpCutoff = .true.),
                    ! or a number of states to keep is specified and we pick the states with the
                    ! biggest amplitudes (tAmpCutoff = .false.).
                    if (opt_data%tAmpCutoff) then
                        counter = 0
                        do j = 1, new_num_states
                            if (abs(davidson_eigenvector(j)) > opt_data%cutoff_amps(i)) then
                                counter = counter + 1
                                ilut_store(:, counter) = ilut_store(:, j)
                            end if
                        end do
                        old_num_states = counter
                    else
                        ! Sort the list in order of the amplitude of the states in the ground state,
                        ! from largest to smallest.
                        call sort(davidson_eigenvector(:), ilut_store(:, 1:new_num_states))

                        ! Set old_num_states to specify the number of states which should be kept.
                        old_num_states = opt_data%cutoff_nums(i)
                        if (old_num_states > new_num_states) old_num_states = new_num_states
                    end if

                    write(stdout, '(i8,1X,a12)') old_num_states, "states kept."
                    call neci_flush(stdout)

                    call deallocate_sparse_ham(sparse_ham, SparseHamilTags)
                    deallocate(hamil_diag, stat=ierr)
                    call LogMemDealloc(t_r, HDiagTag, ierr)

                end associate
            end do

        end if ! If on root.

        ! At this point, the space has been generated on the root processor. The rest is
        ! just sending the right info to the right processors...

        if (iProcIndex == root) then
            ! Find which core each state belongs to and sort accordingly.
            call sort_space_by_proc(ilut_store(:, 1:old_num_states), old_num_states, proc_space_sizes)

            ! Create displacement and sendcount arrays for MPIScatterV later:
            sendcounts = int(proc_space_sizes * (NIfTot + 1), MPIArg)
            disps(0) = 0_MPIArg
            do i = 1, nProcessors - 1
                disps(i) = int(disps(i - 1) + proc_space_sizes(i - 1) * (NIfTot + 1), MPIArg)
            end do
        end if

        ! Send the number of states on each processor to the corresponding processor.
        call MPIScatter(proc_space_sizes, this_proc_size, ierr)
        recvcount = int(this_proc_size * (NIfTot + 1), MPIArg)

        ! Finally send the actual determinants to the ilut_list array.
        call MPIScatterV(ilut_store, sendcounts, disps, ilut_list(:, space_size + 1:space_size + this_proc_size), recvcount, ierr)

        space_size = space_size + int(this_proc_size)

        ! Finally, deallocate arrays.
        if (allocated(ilut_store)) then
            deallocate(ilut_store, stat=ierr)
            call LogMemDealloc(t_r, IlutTag, ierr)
        end if
        if (iProcIndex == root) then
            deallocate(temp_space, stat=ierr)
            call LogMemDealloc(t_r, TempTag, ierr)
        end if

    end subroutine generate_optimised_space

    subroutine generate_space_most_populated(target_space_size, tApproxSpace, &
                                             nApproxSpace, ilut_list, space_size, run, opt_source, opt_source_size, t_opt_fast_core)

        ! In: target_space_size - The number of determinants to attempt to keep
        !         from if less determinants are present then use all of them.
        ! In: tApproxSpace - If true then only find *approximately* the best
        !         space, to save memory (although in many cases it will end up
        !         finding the best space).
        ! In: nApproxSpace - factor that defines how many states are kept on each
        !         process if tApproxSpace is true, 1 =< nApproxSpace =< nProcessors.
        !         The larger nApproxSpace, the more memory is consumed and the slower
        !         (but more accurate) the semi-stochastic initialisation is.
        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.

        integer, intent(in) :: target_space_size, nApproxSpace
        integer(n_int), intent(in), optional :: opt_source_size
        integer(n_int), intent(in), optional :: opt_source(0:, :)
        logical, intent(in) :: tApproxSpace
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size
        integer, intent(in) :: run
        logical, intent(in), optional :: t_opt_fast_core
        real(dp), allocatable, dimension(:) :: amps_this_proc, amps_all_procs
        real(dp) :: real_sign(lenof_sign)
        integer(MPIArg) :: length_this_proc, total_length
        integer(MPIArg) :: lengths(0:nProcessors - 1), disps(0:nProcessors - 1)
        integer(n_int) :: temp_ilut(0:NIfTot)
        real(dp) :: core_sum, all_core_sum
        integer(n_int), dimension(:, :), allocatable :: largest_states
        integer, allocatable, dimension(:) :: indices_to_keep
        integer :: j, ierr, ind, n_pops_keep, n_states_this_proc
        integer(int64) :: i
        integer(TagIntType) :: TagA, TagB, TagC, TagD
        character(len=*), parameter :: this_routine = "generate_space_most_populated"
        integer :: nzero_dets
        real(dp) :: max_vals(0:nProcessors - 1), min_vals(0:nProcessors - 1), max_sign, min_sign
        integer(int64) :: source_size
        integer(n_int), allocatable :: loc_source(:, :)
        integer(MPIARg) :: max_size
        logical :: t_use_fast_pops_core

        if (present(opt_source)) then
            ASSERT(present(opt_source_size))
            source_size = int(opt_source_size, int64)

            allocate(loc_source(0:niftot, 1:source_size), &
                      source=opt_source(0:niftot, 1:source_size))
!             loc_source => opt_source

        else
            source_size = TotWalkers
            allocate(loc_source(0:niftot, 1:source_size), &
                      source=CurrentDets(0:niftot, 1:source_size))
!             loc_source => CurrentDets
        end if

        def_default(t_use_fast_pops_core, t_opt_fast_core, .false.)

        n_pops_keep = target_space_size

        ! Quickly loop through and find the number of determinants with
        ! zero sign.
        nzero_dets = 0
        do i = 1, source_size
            call extract_sign(loc_source(:, i), real_sign)
            if (core_space_weight(real_sign, run) < 1.e-8_dp) &
                nzero_dets = nzero_dets + 1
        end do

        if (tApproxSpace .and. nProcessors > nApproxSpace) then
            ! Look at nApproxSpace  times the number of states that we expect to keep on
            ! this process. This is done instead of sending the best
            ! target_space_size states to all processes, which is often
            ! overkill and uses up too much memory.
            max_size = ceiling(real(nApproxSpace * n_pops_keep) / real(nProcessors), MPIArg)
        else
            max_size = int(n_pops_keep, MPIArg)
        end if

        length_this_proc = min(max_size, int(source_size - nzero_dets, MPIArg))

        call MPIAllGather(length_this_proc, lengths, ierr)
        total_length = sum(lengths)
        if (total_length < n_pops_keep) then
            call warning_neci(this_routine, "The number of states in the walker list is less &
                                   &than the number you requested. All states &
                                   &will be used.")
            n_pops_keep = total_length
        end if

        ! Allocate necessary arrays and log the memory used.
        allocate(amps_this_proc(length_this_proc), stat=ierr)
        call LogMemAlloc("amps_this_proc", int(length_this_proc), 8, this_routine, TagA, ierr)
        allocate(amps_all_procs(total_length), stat=ierr)
        call LogMemAlloc("amps_all_procs", int(total_length), 8, this_routine, TagB, ierr)
        allocate(indices_to_keep(n_pops_keep), stat=ierr)
        call LogMemAlloc("indices_to_keep", n_pops_keep, sizeof_int, this_routine, TagC, ierr)
        allocate(largest_states(0:NIfTot, length_this_proc), stat=ierr)
        call LogMemAlloc("largest_states", int(length_this_proc) * (NIfTot + 1), &
                         size_n_int, this_routine, TagD, ierr)

        disps(0) = 0_MPIArg
        do i = 1, nProcessors - 1
            disps(i) = disps(i - 1) + lengths(i - 1)
        end do

        ! Return the most populated states in source on *this* processor.
        call proc_most_populated_states(int(length_this_proc), run, largest_states)

        do i = 1, length_this_proc
            ! Store the real amplitudes in their real form.
            call extract_sign(largest_states(:, i), real_sign)
            ! We are interested in the absolute values of the ampltiudes.
            amps_this_proc(i) = core_space_weight(real_sign, run)
        end do

        if (t_use_fast_pops_core) then
            if (length_this_proc > 0) then
                min_sign = amps_this_proc(1)
                max_sign = amps_this_proc(ubound(amps_this_proc, dim=1))
            else
                min_sign = 0.0
                max_sign = 0.0
            end if

            ! Make the max/min values available on all procs
            call MPIAllGather(min_sign, min_vals, ierr)
            call MPIAllGather(max_sign, max_vals, ierr)

            call return_proc_share(n_pops_keep, min_vals, max_vals, int(lengths), &
                                   amps_this_proc, n_states_this_proc)
        else

            ! Now we want to combine all the most populated states from each processor to find
            ! how many states to keep from each processor.
            ! Take the top length_this_proc states from each processor.
            call MPIAllGatherV(amps_this_proc(1:length_this_proc), amps_all_procs(1:total_length), lengths, disps)
            ! This routine returns indices_to_keep, which will store the indices in amps_all_procs
            ! of those amplitudes which are among the n_pops_keep largest (but not sorted).
            call return_largest_indices(n_pops_keep, int(total_length), amps_all_procs, indices_to_keep)

            n_states_this_proc = 0
            do i = 1, n_pops_keep

                ind = indices_to_keep(i)

                ! Find the processor label for this state. The states of the ampltidues in
                ! amps_all_procs are together in blocks, in order of the corresponding processor
                ! label. Hence, if a state has index <= lengths(0) then it is on processor 0.
                do j = 0, nProcessors - 1
                    ind = ind - lengths(j)
                    if (ind <= 0) then
                        ! j now gives the processor label. If the state is on *this* processor,
                        ! update n_states_this_proc.
                        if (j == iProcIndex) n_states_this_proc = n_states_this_proc + 1
                        exit
                    end if
                end do

            end do
        end if

        ! Add the states to the ilut_list array.
        temp_ilut = 0_n_int
        core_sum = 0.0_dp
        do i = 1, n_states_this_proc
            ! The states in largest_states are sorted from smallest to largest.
            temp_ilut(0:NIfTot) = largest_states(0:NIfTot, length_this_proc - i + 1)
            call add_state_to_space(temp_ilut, ilut_list, space_size)
            core_sum = core_sum + amps_this_proc(length_this_proc - i + 1)
        end do
        call MPISum(core_sum, all_core_sum)
        write(stdout, *) "Total core population", all_core_sum
        deallocate(amps_this_proc)
        call LogMemDealloc(this_routine, TagA, ierr)
        deallocate(amps_all_procs)
        call LogMemDealloc(this_routine, TagB, ierr)
        deallocate(indices_to_keep)
        call LogMemDealloc(this_routine, TagC, ierr)
        deallocate(largest_states)
        call LogMemDealloc(this_routine, TagD, ierr)

    end subroutine generate_space_most_populated

    subroutine generate_space_from_file(filename, ilut_list, space_size)

        ! In: filename - Name of file to read for determinants.
        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.

        character(255), intent(in) :: filename
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size

        integer :: iunit, stat
        integer(n_int) :: ilut(0:NIfTot), ilut_tmp(0:NIfTot)
        logical :: does_exist

        inquire (file=trim(filename), exist=does_exist)
        if (.not. does_exist) call stop_all("generate_space_from_file", &
                                            "No "//trim(filename)//" file detected.")

        iunit = get_free_unit()
        open(iunit, file=trim(filename), status='old')

        ilut = 0_n_int
        ilut_tmp = 0_n_int

        do
            read(iunit, *, iostat=stat) ilut(0:nifd)

            ! If the end of the file.
            if (stat < 0) exit

            ! If this determinant isn't the correct determinant for the time
            ! reversal symmetry, then try the spin-flipped version.
            if (tHPHF) then
                if (.not. IsAllowedHPHF(ilut(0:NIfD))) then
                    call spin_sym_ilut(ilut(0:NIfD), ilut_tmp(0:NIfD))
                    if (.not. IsAllowedHPHF(ilut_tmp(0:NIfD))) then
                        cycle
                    else
                        ilut(0:NIfD) = ilut_tmp(0:NIfD)
                    end if
                end if
            end if

            call add_state_to_space(ilut, ilut_list, space_size)
        end do

        close(iunit)

    end subroutine generate_space_from_file

    subroutine generate_using_mp1_criterion(target_ndets, ilut_list, space_size)

        ! In: target_ndets - The number of determinants to keep, unless there are less
        !     singles and doubles than this value, in which case all singles and doubles
        !     will be kept.
        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.

        integer, intent(in) :: target_ndets
        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size

        integer(n_int), allocatable :: temp_list(:, :)
        real(dp), allocatable :: amp_list(:)
        integer(n_int) :: ilut(0:NIfTot)
        integer :: nI(nel)
        integer :: ex(2, maxExcit), ex_flag, ndets
        integer :: pos, i
        real(dp) :: amp, energy_contrib
        logical :: tAllExcitFound, tParity
        integer(n_int), allocatable :: excitations(:, :)
        integer(n_int) :: ilutG(0:nifguga)

        allocate(amp_list(target_ndets))
        allocate(temp_list(0:NIfD, target_ndets))
        amp_list = 0.0_dp
        temp_list = 0_n_int

        ! Should we generate just singles (1), just doubles (2), or both (3)?
        if (tUEG .or. tNoSingExcits) then
            ex_flag = 2
        else if (tHub) then
            if (tReal) then
                ex_flag = 1
            else
                ex_flag = 2
            end if
        else
            ! Generate both the single and double excitations.
            ex_flag = 3
        end if

        tAllExcitFound = .false.
        ! Count the HF determinant.
        ndets = 1
        ex = 0
        ilut = 0_n_int

        ! Start by adding the HF state.
        temp_list(0:NIfD, 1) = ilutHF(0:NIfD)

        amp_list = huge(amp)
        ! Set this amplitude to be the lowest possible number so that it doesn't get removed from
        ! the list in the following selection - we always want to keep the HF determinant.
        amp_list(1) = -huge(amp)

        ! Loop through all connections to the HF determinant and keep the required number which
        ! have the largest MP1 weights.

        if (tGUGA) then
            ! in guga, create all excitations at once and then check for the
            ! MP1 amplitude in an additional loop
            call convert_ilut_toGUGA(ilutHF, ilutG)
            call actHamiltonian(ilutG, CSF_Info_t(ilutG), excitations, ndets)
            do i = 1, ndets
                call convert_ilut_toNECI(excitations(:, i), ilut)
                call decode_bit_det(nI, ilut)

                call return_mp1_amp_and_mp2_energy(nI, ilut, ex, tParity, amp, &
                                                   energy_contrib)

                pos = binary_search_real(amp_list, -abs(amp), 1.0e-8_dp)

                if (pos < 0) pos = -pos

                if (pos > 0 .and. pos <= target_ndets) then
                    ! Shuffle all less significant determinants down one slot, and throw away the
                    ! previous least significant determinant.
                    temp_list(0:NIfD, pos + 1:target_ndets) = temp_list(0:NIfD, pos:target_ndets - 1)
                    amp_list(pos + 1:target_ndets) = amp_list(pos:target_ndets - 1)

                    ! Add in the new ilut and amplitude in the correct position.
                    temp_list(0:NIfD, pos) = ilut(0:NIfD)
                    ! Store the negative absolute value, because the binary search sorts from
                    ! lowest (most negative) to highest.
                    amp_list(pos) = -abs(amp)
                end if
            end do

        else
            do while (.true.)
                call GenExcitations3(HFDet, ilutHF, nI, ex_flag, ex, tParity, tAllExcitFound, .false.)
                ! When no more basis functions are found, this value is returned and the loop is exited.
                if (tAllExcitFound) exit

                call EncodeBitDet(nI, ilut)
                if (tHPHF) then
                    if (.not. IsAllowedHPHF(ilut(0:NIfD))) cycle
                end if
                ndets = ndets + 1

                ! If a determinant is returned (if we did not find the final one last time.)
                if (.not. tAllExcitFound) then
                    call return_mp1_amp_and_mp2_energy(nI, ilut, ex, tParity, amp, energy_contrib)

                    pos = binary_search_real(amp_list, -abs(amp), 1.0e-8_dp)

                    ! If pos is less then there isn't another determinant with the same amplitude
                    ! (which will be common), but -pos specifies where in the list it should be
                    ! inserted to keep amp_list in order.
                    if (pos < 0) pos = -pos

                    if (pos > 0 .and. pos <= target_ndets) then
                        ! Shuffle all less significant determinants down one slot, and throw away the
                        ! previous least significant determinant.
                        temp_list(0:NIfD, pos + 1:target_ndets) = temp_list(0:NIfD, pos:target_ndets - 1)
                        amp_list(pos + 1:target_ndets) = amp_list(pos:target_ndets - 1)

                        ! Add in the new ilut and amplitude in the correct position.
                        temp_list(0:NIfD, pos) = ilut(0:NIfD)
                        ! Store the negative absolute value, because the binary search sorts from
                        ! lowest (most negative) to highest.
                        amp_list(pos) = -abs(amp)
                    end if

                end if
            end do
        end if

        call warning_neci("generate_using_mp1_criterion", &
            "Note that there are less connections to the Hartree-Fock than the requested &
            &size of the space. The space will therefore be smaller than requested, &
            &containing all connections.")

        ! Now that the correct determinants have been selected, add them to the desired space.
        do i = 1, min(ndets, target_ndets)
            ilut(0:NIfD) = temp_list(0:NIfD, i)
            call add_state_to_space(ilut, ilut_list, space_size)
        end do

    end subroutine generate_using_mp1_criterion

    subroutine enumerate_sing_doub_kpnt(ex_flag, only_keep_conn, nSing, nDoub, tStore, ilut_list, space_size)

        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.

        integer, intent(in) :: ex_flag
        logical, intent(in) :: only_keep_conn
        integer, intent(out) :: nSing, nDoub
        logical, intent(in) :: tStore
        integer(n_int), optional, intent(inout) :: ilut_list(0:, :)
        integer, optional, intent(inout) :: space_size

        integer, allocatable :: excit_gen(:)
        integer(n_int) :: ilut(0:NIfTot)
        integer :: iExcit, iMaxExcit, ierr
        integer :: nJ(nel), nStore(6), nExcitMemLen(1)
        logical :: tTempUseBrill
        character(*), parameter :: t_r = 'enumerate_doubles_kpnt'
        HElement_t(dp) :: HEl

        nSing = 0
        nDoub = 0
        iMaxExcit = 0
        nStore(1:6) = 0

        ! Use Alex's old excitation generators. However, we have to ensure
        ! that brillouins theorem isn't on!
        if (tUseBrillouin) then
            tTempUseBrill = .true.
            tUseBrillouin = .false.
        else
            tTempUseBrill = .false.
        end if

        call gensymexcitit2par_worker(hfdet, nel, G1, nBasis, .true., nExcitMemLen, &
                            nJ, iMaxExcit, nStore, ex_flag, 1, nEl)

        allocate(excit_gen(nExcitMemLen(1)), stat=ierr)
        if (ierr /= 0) call Stop_All(t_r, "Problem allocating excitation generator")
        excit_gen = 0

        call gensymexcitit2par_worker(hfdet, nel, G1, nBasis, .true., excit_gen, nJ, &
                            iMaxExcit, nStore, ex_flag, 1, nEl)

        if (tGUGA) then
            call stop_all("generate_sing_doub_determinants", &
                          "modify get_helement for GUGA")
        end if
        do while (.true.)
            call gensymexcitit2par_worker(hfdet, nel, G1, nBasis, .false., excit_gen, &
                                nJ, iExcit, nStore, ex_flag, 1, nEl)

            if (nJ(1) == 0) exit

            if (tStore) then
                call EncodeBitDet(nJ, ilut)
                ! If using a deterministic space connected to the Hartree-Fock
                ! then check that this determinant is actually connected to it!
                if (only_keep_conn) then
                    HEl = get_helement(hfdet, nJ, ilutHF, ilut)
                    if (abs(real(HEl, dp)) < 1.e-12_dp) cycle
                end if
                call add_state_to_space(ilut, ilut_list, space_size, nJ)
            end if

            if (iExcit == 1) then
                nSing = nSing + 1
            else if (iExcit == 2) then
                nDoub = nDoub + 1
            else
                call stop_all(t_r, "Trying to generate more than doubles!")
            end if
        end do

        tUseBrillouin = tTempUseBrill

    end subroutine enumerate_sing_doub_kpnt

    !subroutine generate_heisenberg_fci(ilut_list, space_size)

    !    ! In/Out: ilut_list - List of determinants generated.
    !    ! In/Out: space_size - Number of determinants in the generated space.
    !    !             If ilut_list is not empty on input and you want to keep
    !    !             the states already in it, then on input space_size should
    !    !             be equal to the number of states to be kept in ilut_list,
    !    !             and new states will be added in from space_size+1.
    !    !             Otherwise, space_size must equal 0 on input.
    !    !             On output space_size will equal the total number of
    !    !             generated plus what space_size was on input.

    !    use SystemData, only: nel, nbasis

    !    integer(n_int), intent(inout) :: ilut_list(0:,:)
    !    integer, intent(inout) :: space_size

    !    integer :: nsites, nup
    !    integer :: up_spins(nel/2+1)

    !    nsites = nbasis/2
    !    nup = nel/2
    !    call generate_heisenberg_fci_r(1, up_spins, nsites, nup, ilut_list, space_size)

    !end subroutine generate_heisenberg_fci

    !recursive subroutine generate_heisenberg_fci_r(ispin, up_spins, nsites, nup, ilut_list, space_size)

    !    use SystemData, only: nel

    !    integer, value :: ispin
    !    integer, intent(inout) :: up_spins(nel/2+1)
    !    integer, intent(in) :: nsites, nup
    !    integer(n_int), intent(inout) :: ilut_list(0:,:)
    !    integer, intent(inout) :: space_size

    !    integer :: i, isite, counter, starting_site
    !    integer :: alpha_ind, beta_ind, pos
    !    integer(n_int) :: ilut(0:NIfTot)

    !    starting_site = 1
    !    if (ispin > 1) starting_site = up_spins(ispin-1) + 1

    !    do isite = starting_site, nsites
    !        counter = 1
    !        ilut = 0_n_int
    !        up_spins(ispin) = isite
    !        ! If we're on the last spin.
    !        if (ispin == nup) then
    !            do i = 1, nsites
    !                ! If this site has been chosen to have an up spin on it.
    !                if (i == up_spins(counter)) then
    !                    alpha_ind = 2*i
    !                    pos = (alpha_ind - 1)/bits_n_int
    !                    ilut(pos) = ibset(ilut(pos), mod(alpha_ind-1, bits_n_int))
    !                    ! Consider the next up spin on the next loop.
    !                    counter = counter + 1
    !                else
    !                    beta_ind = 2*i-1
    !                    pos = (beta_ind - 1)/bits_n_int
    !                    ilut(pos) = ibset(ilut(pos), mod(beta_ind-1, bits_n_int))
    !                end if
    !            end do
    !            call add_state_to_space(ilut, ilut_list, space_size)
    !        else
    !            call generate_heisenberg_fci_r(ispin+1, up_spins, nsites, nup, ilut_list, space_size)
    !        end if
    !    end do
    !
    !end subroutine generate_heisenberg_fci_r

    subroutine generate_fci_core(ilut_list, space_size)

        ! In/Out: ilut_list - List of determinants generated.
        ! In/Out: space_size - Number of determinants in the generated space.
        !             If ilut_list is not empty on input and you want to keep
        !             the states already in it, then on input space_size should
        !             be equal to the number of states to be kept in ilut_list,
        !             and new states will be added in from space_size+1.
        !             Otherwise, space_size must equal 0 on input.
        !             On output space_size will equal the total number of
        !             generated plus what space_size was on input.


        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size

        integer, allocatable :: nI_list(:, :)
        integer(n_int) :: ilut(0:NIfTot)
        integer :: temp(1, 1), hf_ind, ndets, i
        character(*), parameter :: t_r = "generate_fci_core"

        if (.not. tSymSet) call stop_all(t_r, "To use the 'FCI-CORE' option you must also choose the symmetry sector of &
                                              &space to be generated by using the 'SYM' option.")

        if (.not. (tSpn .or. tGUGACore)) then
            call stop_all(t_r, "To use the FCI-CORE option you must fix the total spin on &
                               &z-axis, e.g., by using 'SPIN-RESTRICT' option for a SD &
                               &basis or 'GUGA' option for a GT basis.")
        end if

        ! Count the total number of determinants.
        call gndts(nel, nbasis, BRR, nBasisMax, temp, .true., G1, tSpn, lms, tParity, SymRestrict, ndets, hf_ind)
        allocate(nI_list(nel, ndets))
        if (size(ilut_list, 2) < ndets) then
            call stop_all(t_r, 'The ilut_list argument is too small. Probably SpawnedParts was allocated too small &
                    & Please increase memoryfacspawn and memoryfacpart')
        end if
        ! Generate and store all the determinants in nI_list.
        call gndts(nel, nbasis, BRR, nBasisMax, nI_list, .false., G1, tSpn, lms, tParity, SymRestrict, ndets, hf_ind)

        if (tGUGACore) then
            do i = 1, ndets
                call EncodeBitDet(nI_list(:, i), ilut)
                if (isProperCSF_flexible(ilut, STOT, nel)) call add_state_to_space(ilut, ilut_list, space_size, nI_list(:, i))
            end do
        else
            do i = 1, ndets
                call EncodeBitDet(nI_list(:, i), ilut)
                call add_state_to_space(ilut, ilut_list, space_size, nI_list(:, i))
            end do
        end if

    end subroutine generate_fci_core

    subroutine generate_all_conn_space(ilut_list, space_size)

        integer(n_int), intent(inout) :: ilut_list(0:, :)
        integer, intent(inout) :: space_size

        integer :: conn_size, conn_size_old, i, ierr
        integer(n_int), allocatable :: conn_space(:, :)
        integer(MPIArg) :: con_sendcounts(0:nProcessors - 1), con_recvcounts(0:nProcessors - 1)
        integer(MPIArg) :: con_senddispls(0:nProcessors - 1), con_recvdispls(0:nProcessors - 1)
        integer(MPIArg) :: SpawnedPartsWidth
        integer :: SpawnedPartsMax

        SpawnedPartsWidth = int(size(SpawnedParts, 1), MPIArg)
        SpawnedPartsMax = int(SpawnedPartsWidth) - 1

        if (space_size > 0) then

            ! Find the states connected to the trial space. This typically takes a long time, so
            ! it is done in parallel by letting each processor find the states connected to a
            ! portion of the trial space.
            write(stdout, '("Calculating the number of states in the connected space...")'); call neci_flush(stdout)

            call generate_connected_space(space_size, ilut_list(0:SpawnedPartsMax, 1:space_size), conn_size)

            write(stdout, '("Attempting to allocate conn_space. Size =",1X,F12.3,1X,"Mb")') &
                real(conn_size, dp) * SpawnedPartsWidth * 7.629392e-06_dp; call neci_flush(stdout)
            allocate(conn_space(0:SpawnedPartsMax, conn_size), stat=ierr)
            conn_space = 0_n_int

            write(stdout, '("States found on this processor, including repeats:",1X,i8)') conn_size

            write(stdout, '("Generating and storing the connected space...")'); call neci_flush(stdout)

            call generate_connected_space(space_size, ilut_list(0:SpawnedPartsMax, 1:space_size), &
                                          conn_size, conn_space)

            write(stdout, '("Removing repeated states and sorting by processor...")'); call neci_flush(stdout)

            call remove_repeated_states(conn_space, conn_size)

            call sort_space_by_proc(conn_space(:, 1:conn_size), conn_size, con_sendcounts)

        else

            conn_size = 0
            con_sendcounts = 0
            allocate(conn_space(0, 0), stat=ierr)
            write(stdout, '("This processor will not search for connected states.")'); call neci_flush(stdout)
            !Although the size is zero, we should allocate it, because the rest of the code use it.
            !Otherwise, we get segmentation fault later.
            allocate(conn_space(0:SpawnedPartsMax, conn_size), stat=ierr)

        end if

        write(stdout, '("States found on this processor, without repeats:",1X,i8)') conn_size; call neci_flush(stdout)

        write(stdout, '("Performing MPI communication of connected states...")'); call neci_flush(stdout)

        ! Send the connected states to their processors.
        ! con_sendcounts holds the number of states to send to other processors from this one.
        ! con_recvcounts will hold the number of states to be sent to this processor from the others.
        call MPIAlltoAll(con_sendcounts, 1, con_recvcounts, 1, ierr)
        conn_size_old = conn_size
        conn_size = sum(con_recvcounts)
        ! The displacements necessary for mpi_alltoall.
        con_sendcounts = con_sendcounts * SpawnedPartsWidth
        con_recvcounts = con_recvcounts * SpawnedPartsWidth
        con_senddispls(0) = 0
        con_recvdispls(0) = 0
        do i = 1, nProcessors - 1
            con_senddispls(i) = con_senddispls(i - 1) + con_sendcounts(i - 1)
            con_recvdispls(i) = con_recvdispls(i - 1) + con_recvcounts(i - 1)
        end do

        !write(stdout,'("Attempting to allocate temp_space. Size =",1X,F12.3,1X,"Mb")') &
        !    real(conn_size,dp)*SpawnedPartsWidth*7.629392e-06_dp; call neci_flush(stdout)
        !allocate(temp_space(0:SpawnedPartsMax, conn_size), stat=ierr)

        call MPIAlltoAllV(conn_space(:, 1:conn_size_old), con_sendcounts, con_senddispls, &
                          ilut_list(:, 1:conn_size), con_recvcounts, con_recvdispls, ierr)

        space_size = conn_size

        if (allocated(conn_space)) then
            deallocate(conn_space, stat=ierr)
        end if
        !write(stdout,'("Attempting to allocate conn_space. Size =",1X,F12.3,1X,"Mb")') &
        !    real(conn_size,dp)*SpawnedPartsWidth*7.629392e-06_dp; call neci_flush(stdout)
        !allocate(conn_space(0:SpawnedPartsMax, 1:conn_size), stat=ierr)
        !conn_space = temp_space
        !deallocate(temp_space, stat=ierr)

    end subroutine generate_all_conn_space

    subroutine write_most_pop_core_at_end(target_space_size)

        ! Write the most populated states in CurrentDets to a DETFILE file,
        ! using the routine generate_space_most_populated, which is the same
        ! routine used by the pops-core semi-stochastic input option. So this
        ! routine basically generates a pops-core space, but can be used at the
        ! end of a calculation, rather than at the start.

        integer, intent(in) :: target_space_size
        integer :: i, j, k, ierr, iunit
        integer :: space_size
        logical :: texist
        character(*), parameter :: t_r = "write_most_pop_core_at_end"

        write(stdout, '(/,"Finding most populated states...")'); call neci_flush(stdout)

        space_size = 0

        ! Calling this routine will find the most populated states in
        ! CurrentDets and copy them across to SpawnedParts, to the first
        ! space_size slots in it (overwriting anything which was there before,
        ! which presumably won't be needed now).
        call generate_space_most_populated(target_space_size, .false., 0, &
                                           SpawnedParts(0:niftot, 1 : ), space_size, core_run)

        write(stdout, '("Writing the most populated states to DETFILE...")'); call neci_flush(stdout)

        iunit = get_free_unit()

        ! Let each process write its states to the file. Each process waits for
        ! the process before it to finish before starting.
        do i = 0, nProcessors - 1

            if (iProcIndex == i) then

                if (i == 0) then
                    open(iunit, file='DETFILE', status='replace')
                else
                    inquire (file='DETFILE', exist=texist)
                    if (.not. texist) call stop_all(t_r, '"DETFILE" file cannot be found')
                    open(iunit, file='DETFILE', status='old', position='append')
                end if

                do j = 1, space_size
                    do k = 0, nifd
                        write(iunit, '(i24)', advance='no') SpawnedParts(k, j)
                    end do
                    write(iunit, *)
                end do

                close(iunit)

            end if

            call MPIBarrier(ierr)

        end do

    end subroutine write_most_pop_core_at_end

!------------------------------------------------------------------------------------------!

    subroutine refresh_semistochastic_space()

        logical :: tStartedFromCoreGround

        ! The reinitialization of the semistochastic space can affect population
        ! because of stochastic rounds. To log this correctly, set the iter_data to 0 here
        iter_data_fciqmc%nborn = 0.0_dp
        iter_data_fciqmc%nremoved = 0.0_dp
        tStaticCore = .false.

        ! as the important determinants might change over time, this
        ! resets the semistochastic space taking the current population to get a new one
        call end_semistoch()
        ! the flag_deterministic flag has to be cleared from all determinants as it is
        ! assumed that no states have that flag when init_semi_stochastic starts
        call reset_core_space()
        ! Now, generate the new deterministic space
        call init_semi_stochastic(ss_space_in, tStartedFromCoreGround)

        ! Changing the semi-stochastic space can involve some roundings
        ! if determinants with population < realSpawnCutoff stop being
        ! in the corespace. Then, we need to log these events.
        iter_data_fciqmc%update_growth = iter_data_fciqmc%update_growth + iter_data_fciqmc%nborn &
                                         - iter_data_fciqmc%nremoved

    end subroutine refresh_semistochastic_space

!------------------------------------------------------------------------------------------!

    subroutine reset_core_space()

        integer(int64) :: i

        do i = 1, TotWalkers
            call clr_flag_multi(CurrentDets(:, i), flag_deterministic)
        end do

    end subroutine reset_core_space

!------------------------------------------------------------------------------------------!

    subroutine init_var_space(rep)

        ! Initialize data needed for the determ-proj-approx-hamil option.
        ! This basically does a deterministic version of the simple-init
        ! option, i.e. it uses the full Hamiltonian in the main 'variational'
        ! space, and just the rectangular block of the Hamiltonian connecting
        ! the vartiational space to the connected space.

        ! To be able to set up this Hamiltonian, we need a way of checking
        ! whether a given state in the full deterministic space is also within
        ! the smaller variational space. To do this, we need a hash table to
        ! the variational space. The following sets that up.

        ! After this has table (var_ht) is created, then generate the
        ! approximate Hamiltonian.

        type(core_space_t), intent(in) :: rep
        integer :: i, ierr
        integer(MPIArg) :: mpi_temp

        allocate(var_sizes(0:nProcessors - 1))
        allocate(var_displs(0:nProcessors - 1))
        var_sizes = 0_MPIArg

        mpi_temp = var_size_this_proc
        call MPIAllGather(mpi_temp, var_sizes, ierr)

        var_space_size = sum(var_sizes)
        var_space_size_int = int(var_space_size)

        var_displs(0) = 0
        do i = 1, nProcessors - 1
            var_displs(i) = var_displs(i - 1) + var_sizes(i - 1)
        end do

        ! var_space is the variational space from all processes stuck together.
        allocate(var_space(0:NIfTot, var_space_size), stat=ierr)
        call MPIAllGatherV(temp_var_space(0:NIfTot, 1:var_sizes(iProcIndex)), &
                           var_space, var_sizes, var_displs)

        call initialise_shared_rht(var_space, var_space_size_int, var_ht)

        write(stdout, '("Generating the approximate Hamiltonian...")'); call neci_flush(stdout)
        if (tHPHF) then
            call calc_approx_hamil_sparse_hphf(rep)
        else
            call stop_all("init_var_space", "Not implemented yet.")
        end if

    end subroutine init_var_space

end module semi_stoch_gen