k_space_hubbard.F90 Source File


Contents

Source Code


Source Code

#include "macros.h"

! finally write a specific routine for the k-space hubbard, where every
! necessary functionality for the k-space/momentum space hubbard is brought
! together. since i want to implement a transcorrelation factor for the
! k-space hubbard, which would necessitate a cumulative sum exciation creation
! i decided it is better to start a new branch, instead of hacking into the
! old branches of the code.
! in the end i also want to combine this fully with the lattice_mod implementation
! so i can easily decide which lattice to choose and then decide if we want to
! use a real or momentum space basis (and in the future maybe even wavelets)
module k_space_hubbard
    use SystemData, only: t_lattice_model, t_k_space_hubbard, t_trans_corr, &
                          trans_corr_param, t_trans_corr_2body, trans_corr_param_2body, &
                          nel, tHPHF, nOccBeta, nOccAlpha, nbasis, tLatticeGens, tHub, &
                          omega, bhub, nBasisMax, G1, BasisFN, NullBasisFn, TSPINPOLAR, &
                          treal, ttilt, tExch, ElecPairs, MaxABPairs, Symmetry, SymEq, &
                          t_new_real_space_hubbard, SymmetrySize, tNoBrillouin, tUseBrillouin, &
                          excit_cache, t_uniform_excits, brr, uhub, lms, t_mixed_excits, &
                          tGUGA, tgen_guga_mixed, tgen_guga_crude, t_approx_exchange, &
                          t_anti_periodic

    use lattice_mod, only: get_helement_lattice_ex_mat, get_helement_lattice_general, &
                           determine_optimal_time_step, lattice, sort_unique, lat, &
                           init_dispersion_rel_cache, &
                           epsilon_kvec

    use procedure_pointers, only: get_umat_el

    use constants, only: n_int, dp, EPS, bits_n_int, int64, maxExcit, stdout

    use bit_rep_data, only: NIfTot, nifd, nIfGUGA
    use bit_reps, only: decode_bit_det

    use DetBitOps, only: FindBitExcitLevel, EncodeBitDet, ilut_lt, ilut_gt, GetBitExcitation

    use real_space_hubbard, only: lat_tau_factor

    use fcimcdata, only: pDoubles, pParallel, excit_gen_store_type, pSingles

    use CalcData, only: pParallelIn, pSinglesIn, pDoublesIn

    use tau_main, only: tau, tau_search_method, possible_tau_search_methods, &
        assign_value_to_tau, min_tau, max_tau

    use dsfmt_interface, only: genrand_real2_dsfmt

    use util_mod, only: binary_search_first_ge, near_zero, &
                        operator(.isclose.), operator(.div.), clamp, stop_all

    use get_excit, only: make_double

    use OneEInts, only: GetTMatEl, tmat2d

    use sltcnd_mod, only: initSltCndPtr, sltcnd_excit

    use excitation_types, only: Excite_0_t

    use sym_mod, only: RoundSym, AddElecSym, SetupSym, lChkSym, mompbcsym, &
                       TotSymRep, GenMolpSymTable, SymProd, gensymstatepairs

    use SymExcitDataMod, only: KPointToBasisFn, ScratchSize, SpinOrbSymLabel, &
                               SymTableLabels, SymInvLabel, SymLabelList2, SymLabelCounts2, &
                               OrbClassCount, ktotal

    use sym_general_mod, only: ClassCountInd

    use sort_mod, only: sort

    use IntegralsData, only: UMat

    use global_utilities, only: LogMemDealloc, LogMemAlloc

    use SymData, only: nSymLabels, SymClasses, Symlabels

    use SymData, only: nSym, SymConjTab

    use SymData, only: tAbelian, SymTable

    use SymData, only: tagSymConjTab, tagSymClasses, tagSymLabels

    use SymData, only: tagSymTable

    use MPI_wrapper, only: iProcIndex, root

    use lattice_models_utils, only: make_ilutJ, get_ispn, get_orb_from_kpoints, &
                                    create_all_dets, find_minority_spin, &
                                    get_orb_from_kpoints_three, swap_excitations, &
                                    pick_spin_opp_elecs, pick_from_cum_list, &
                                    pick_spin_par_elecs, pick_three_opp_elecs

    use guga_main, only: generate_excitation_guga
    use guga_excitations, only: global_excitinfo, print_excitInfo
    use guga_matrixElements, only: calc_guga_matrix_element
    use guga_bitRepOps, only: convert_ilut_toGUGA, is_compatible, &
                              isProperCSF_ilut, current_csf_i, CSF_Info_t
    use guga_data, only: ExcitationInformation_t
    use excit_mod, only: GetExcitation, isvaliddet
    use hubbard_mod, only: calctmathub, setbasislim_hubtilt, setbasislim_hub

    better_implicit_none
    external :: calcmathub
    private
    public :: get_diag_helement_k_sp_hub, &
        init_three_body_const_mat, init_two_body_trancorr_fac_matrix, &
        init_get_helement_k_space_hub, setup_k_space_hub_sym, &
        init_tmat_kspace, setup_g1, setup_nbasismax, &
        setup_k_total, setup_kPointToBasisFn, setup_tmat_k_space, &
        get_offdiag_helement_k_sp_hub, get_helement_k_space_hub, &
        initialize_excit_table, get_3_body_helement_ks_hub, &
        check_momentum_sym, make_triple, two_body_transcorr_factor, &
        epsilon_kvec, three_body_transcorr_fac, same_spin_transcorr_factor, &
        get_helement_k_space_hub_general, get_helement_k_space_hub_ex_mat, &
        n_opp, three_body_prefac, create_ab_list_hubbard, &
        calc_pgen_k_space_hubbard, gen_parallel_double_hubbard, &
        gen_triple_hubbard, pick_a_orbital_hubbard, &
        pick_ab_orbitals_hubbard, pick_bc_orbitals_hubbard, &
        create_ab_list_par_hubbard, pick_ab_orbitals_par_hubbard, &
        create_bc_list_hubbard, calc_pgen_k_space_hubbard_transcorr, &
        calc_pgen_k_space_hubbard_par, calc_pgen_k_space_hubbard_triples, &
        gen_excit_k_space_hub, gen_excit_uniform_k_space_hub_test, &
        gen_excit_k_space_hub_transcorr_test, get_umat_kspace, &
        gen_excit_uniform_k_space_hub, init_k_space_hubbard, &
        gen_excit_k_space_hub_transcorr, gen_excit_mixed_k_space_hub_transcorr, &
        gen_excit_uniform_k_space_hub_transcorr, setup_symmetry_table, &
        get_2_body_diag_transcorr, get_3_body_diag_transcorr, calc_pgen_k_space_hubbard_uniform_transcorr
#ifndef CMPLX_
    public :: get_j_opt
#endif

    integer, parameter :: ABORT_EXCITATION = 0
    integer, parameter :: N_DIM = 3

    real(dp) :: three_body_prefac = 0.0_dp
    real(dp) :: n_opp(-1:1) = 0.0_dp

    ! temporary flag for the j optimization
    logical :: t_symmetric = .true.

    real(dp), allocatable :: two_body_transcorr_factor_matrix(:, :), &
                             three_body_const_mat(:, :, :)
    ! i especially need an interface for the matrix element calculation to
    ! implement the transcorrelated hamiltonian
    interface get_helement_k_space_hub
        module procedure get_helement_k_space_hub_ex_mat
        module procedure get_helement_k_space_hub_general
    end interface get_helement_k_space_hub

    interface get_one_body_diag
        module procedure get_one_body_diag_sym
        module procedure get_one_body_diag_kvec
    end interface get_one_body_diag

    interface same_spin_transcorr_factor
        module procedure same_spin_transcorr_factor_kvec
        module procedure same_spin_transcorr_factor_ksym
    end interface same_spin_transcorr_factor

    interface three_body_transcorr_fac
        module procedure three_body_transcorr_fac_kvec
        module procedure three_body_transcorr_fac_ksym
    end interface three_body_transcorr_fac

    interface two_body_transcorr_factor
        module procedure two_body_transcorr_factor_kvec
        module procedure two_body_transcorr_factor_ksym
    end interface two_body_transcorr_factor

    interface three_body_rpa_contrib
        module procedure rpa_contrib_ksym
        module procedure rpa_contrib_kvec
    end interface three_body_rpa_contrib

    interface two_body_contrib
        module procedure two_body_contrib_ksym
        module procedure two_body_contrib_kvec
    end interface two_body_contrib

    interface three_body_exchange_contrib
        module procedure exchange_contrib_ksym
        module procedure exchange_contrib_kvec
    end interface three_body_exchange_contrib

contains

    subroutine setup_symmetry_table()
        ! implement a new symmetry setup to decouple it from the
        ! old hubbard.F code..

        character(*), parameter :: this_routine = "setup_symmetry_table"

        integer :: i, j, k, l, k_i(3), ind, kmin(3), kmax(3)

        ! the only problem could be that we reorderd the orbitals already or?
        ! so G1 has a different ordering than just 1, nBasis/2...
        ! yes it really is reordered already! hm..
        ASSERT(associated(lat))
        ASSERT(associated(G1))

        nsym = nBasis / 2
        nSymLabels = nsym

        ! copy the output from the old hubbard code:
        write(stdout, "(A,I3,A)") "Generating abelian symmetry table with", &
            nsym, " generators for Hubbard momentum"
        if (allocated(SymLabels)) then
            write(stdout, '(a/a)') &
                'Warning: symmetry info already allocated.', &
                'Deallocating and reallocating.'
            deallocate(SymLabels)
            call LogMemDealloc(this_routine, tagSymLabels)
        end if
        allocate(SymLabels(nSym))
        call LogMemAlloc('SymLabels', nSym, SymmetrySize, this_routine, tagSymLabels)
        if (associated(SymClasses)) then
            deallocate(SymClasses)
            call LogMemDealloc(this_routine, tagSymClasses)
        end if
        ! [W.D]
        ! why is symclasses allocated to nBasis? it is actually only used
        ! and filled up to nBasis/2, also in the rest of the code..
        allocate(SymClasses(nBasis / 2))
        call LogMemAlloc('SymClasses', nBasis, 4, this_routine, tagSymClasses)

        ! for some strange reason the (0,0,0) k-vector is treated
        ! special in the old implementation.. since it is its own
        ! symmetry inverse.. but this might be not the case in the
        ! general lattices or? there can be other states which are
        ! also its own inverse or?

        ! so try to change that here..
        ! ok i should still treat the gamma point special and set its
        ! sym label to 1 and maybe also order the symmetries by energy?

        if (allocated(SymTable)) then
            deallocate(SymTable)
            call LogMemDealloc(this_routine, tagSymTable)
        end if
        allocate(SymTable(nSym, nSym))
        call LogMemAlloc('SymTable', nSym**2, SymmetrySize, this_routine, tagSymTable)
        if (allocated(SymConjTab)) then
            deallocate(SymConjTab)
            call LogMemDealloc(this_routine, tagSymConjTab)
        end if
        allocate(SymConjTab(nSym))
        call LogMemAlloc('SymConjTable', nSym, 4, this_routine, tagSymConjTab)

        SYMTABLE = Symmetry(0)

        tAbelian = .false.

        ! also setup the stuff contained in the lattice class.
        ASSERT(associated(lat))

        if (allocated(lat%k_to_sym)) deallocate(lat%k_to_sym)
        if (allocated(lat%sym_to_k)) deallocate(lat%sym_to_k)
        if (allocated(lat%mult_table)) deallocate(lat%mult_table)
        if (allocated(lat%inv_table)) deallocate(lat%inv_table)

        allocate(lat%sym_to_k(lat%get_nsites(), 3))
        allocate(lat%mult_table(lat%get_nsites(), lat%get_nsites()))
        allocate(lat%inv_table(lat%get_nsites()))

        ! i have to setup the symlabels first ofc..
        do i = 1, lat%get_nsites()
            ind = get_spatial(brr(2 * i))
            SymClasses(ind) = i
            ! and also just encode the symmetry labels as integers, instead of
            ! 2^(k-1), to be able to treat more than 64 orbitals (in the old
            ! implementation, an integer overflow happened in this case!)
            SymLabels(ind)%s = i
            ! i also need it in G1:
            ! is G1 already ordered by energy??

            call lat%set_sym(ind, i)

        end do

        kmin = 0
        kmax = 0
        do i = 1, lat%get_nsites()
            k_i = lat%get_k_vec(i)
            do j = 1, lat%get_ndim()
                if (k_i(j) < kmin(j)) kmin(j) = k_i(j)
                if (k_i(j) > kmax(j)) kmax(j) = k_i(j)
            end do
        end do

        allocate(lat%k_to_sym(kmin(1):kmax(1), kmin(2):kmax(2), kmin(3):kmax(3)))

        lat%k_to_sym = 0

        ! now find the inverses:
        do i = 1, lat%get_nsites()
            G1(2 * i - 1)%Sym = SymLabels(i)
            G1(2 * i)%Sym = SymLabels(i)

            ! find the orbital of -k
            j = lat%get_orb_from_k_vec(-lat%get_k_vec(i))

            ! since i have a linear encoding of the symmetries i do not need
            ! to use SymClasses here..
            SymConjTab(SymClasses(i)) = SymClasses(j)

            lat%inv_table(SymClasses(i)) = SymClasses(j)
            ! maybe also store the k_inverse of a orbital..

            lat%sym_to_k(SymClasses(i), :) = lat%get_k_vec(i)

            k_i = lat%get_k_vec(i)
            lat%k_to_sym(k_i(1), k_i(2), k_i(3)) = SymClasses(i)

            ! and create the symmetry product of (i) with every other symmetry
            do k = 1, lat%get_nsites()
                ! i just have to add the momenta and map it to the first BZ

                l = lat%get_orb_from_k_vec(lat%get_k_vec(i) + lat%get_k_vec(k))

                SymTable(SymClasses(i), SymClasses(k)) = SymLabels(l)

                lat%mult_table(SymClasses(i), SymClasses(k)) = SymClasses(l)

            end do
        end do
#ifdef DEBUG_
        write(stdout, *) "Symmetry, Symmetry Conjugate"
        do i = 1, lat%get_nsites()
            print *, i, SymConjTab(i)
        end do

        print *, "lat%sym_to_k: "
        do i = 1, lat%get_nsites()
            print *, i, "|", SymClasses(i), "|", lat%sym_to_k(i, :)
        end do

        print *, "lat%inv_table: "
        do i = 1, lat%get_nsites()
            print *, i, "|", SymClasses(i), "|", lat%inv_table(i)
        end do

        print *, "lat%mult_table: "
        do i = 1, lat%get_nsites()
            print *, lat%mult_table(i, :)
        end do

        print *, "lat%k_to_sym: "
        do i = 1, lat%get_nsites()
            k_i = lat%get_k_vec(i)
            print *, k_i, "|", lat%k_to_sym(k_i(1), k_i(2), k_i(3))
        end do

#endif

    end subroutine setup_symmetry_table

    pure function get_umat_kspace(i, j, k, l) result(hel)
        ! simplify this get_umat function for the k-space hubbard..
        ! since there was a lot of unnecessary stuff going on in the other
        ! essentially we only have to check if the momenta involved
        ! fullfil k_k + k_l = k_i + k_j
        integer, intent(in) :: i, j, k, l
        HElement_t(dp) :: hel

        ! or just use the symtable information?
        ! do i have to access this with the symmetry label or the orbital
        ! number?? i think i need the symmetry labels..
        ! i could set this on the site level!
        if (SymTable(lat%get_sym(i), lat%get_sym(j))%S == &
            SymTable(lat%get_sym(k), lat%get_sym(l))%S) then
            hel = Umat(1)
        else
            hel = 0.0_dp
        end if
    end function get_umat_kspace

    subroutine init_k_space_hubbard()

        character(*), parameter :: this_routine = "init_k_space_hubbard"
        real(dp) :: tau_opt

        if (iProcIndex == root) then
            print *, " new k-space hubbard implementation init:"
        end if

        ! i have to set the incorrect excitaiton generator flags to false
        tLatticeGens = .false.
        ! maybe i also need to turn off the hubbard keyword.. at this
        ! point
        thub = .false.

        tExch = .true.
        tNoBrillouin = .false.
        tUseBrillouin = .true.

        call check_k_space_hubbard_input()

        get_umat_el => get_umat_kspace

        call init_get_helement_k_space_hub()

        if (t_mixed_excits .and. .not. t_trans_corr_2body) then
            root_print "WARNING: mixed excit gen chosen, but no transcorrelation"
            root_print "    use uniform for doubles!"
            t_uniform_excits = .true.
        end if
        tau_opt = determine_optimal_time_step()

        if (tau < EPS) then
            call assign_value_to_tau(&
                clamp(lat_tau_factor * tau_opt, min_tau, max_tau), &
                'Initialization with optimal tau value')
        else
            if (iProcIndex == root) then
                print *, "optimal time-step would be: ", tau_opt
                print *, "but tau specified in input!"
            end if
        end if

        ! [W.D. 7.3.2018]
        ! re-enable the tau-search and histogramming for the k-space
        ! hubbard model with transcorrelation
        ! since there the matrix elements are not equal for every
        ! excitation anymore and for the Gutzwiller correlation factor
        ! we also have additional pTriples and pParallel variables, which
        ! we might want to adapt on-the-fly as in the ab-initio calculations
        ! and I also want to check if i got the excitation weighting correct
        ! in the transcorrelated case or if i messed it up due to the
        ! non-hermitian character of the hamiltonian
        if (.not. (t_trans_corr_2body .or. t_trans_corr .or. tGUGA)) then
            if (tau_search_method /= possible_tau_search_methods%OFF) then
                call stop_all(this_routine, "tau-search should be switched off")
            end if
        end if

        if (associated(lat)) then
            call setup_nbasismax(lat)
            call setup_g1(lat)
            call setup_tmat_k_space(lat)
            call setup_kPointToBasisFn(lat)
        end if

        if (t_trans_corr_2body) then
            if (tHPHF) then
                call Stop_All(this_routine, "not yet implemented with HPHF")
            end if

            three_body_prefac = real(bhub, dp) * 2.0_dp * (cosh(trans_corr_param_2body) - 1.0_dp) / real(omega**2, dp)
            ! i also have to set some generation probability parameters..

            ! use pSingles for triples!
            ! BE CAREFUL and dont get confused!
            ! @Werner: does this make sense?
            if (allocated(pSinglesIn) .and. allocated(pDoublesIn)) then
                if (.not. (pSinglesIn + pDoublesIn .isclose. 1.0_dp)) then
                    call stop_all(this_routine, "pSinglesIn + pDoublesIn /= 1.0!")
                else
                    pSingles = pSinglesIn
                    pDoubles = pDoublesIn
                end if
            else if (allocated(pSinglesIn) .and. (.not. allocated(pDoublesIn))) then
                pSingles = pSinglesIn
                pDoubles = 1.0_dp - pSingles
            else if (allocated(pDoublesIn) .and. (.not. allocated(pSinglesIn))) then
                pDoubles = pDoublesIn
                pSingles = 1.0_dp - pDoubles
            else
                pDoubles = 0.8_dp
                pSingles = 1.0_dp - pDoubles
            end if

            if (allocated(pParallelIn)) then
                pParallel = pParallelIn
            else
                pParallel = 0.5_dp
            end if

        end if

        if (.not. (t_trans_corr_2body .or. t_trans_corr)) then
            call initialize_excit_table()
        end if

    end subroutine init_k_space_hubbard

    subroutine initialize_excit_table()
        ! This cannot be a member of the lattice class because that would introduce
        ! a circulat dependency on get_offdiag_helement_k_sp_hub
        integer :: a, b, i, j, ex(2, 2)
        ! nI is not needed anywhere, it is a redundant argument in the k_space_hubbard module
        integer :: nI(2)
        real(dp) :: matEL

        ! the buffer for excitations is (number of states)^3
        ! strictly speaking, we do not need to distinguish spin orbs here for simple hubbard
        ! but for transcorrelated, it might matter
        if (allocated(excit_cache)) deallocate(excit_cache)

        allocate(excit_cache(nbasis, nbasis, nbasis))
        ! loop over all pairs of orbitals
        do i = 1, nbasis
            do j = 1, nbasis
                nI = [i, j]
                ex(1, :) = [i, j]
                ! and for each, check all possible excitations
                do a = 1, nbasis
                    matEl = 0.0_dp
                    if (a /= i .and. a /= j) then
                        b = get_orb_from_kpoints(i, j, a)
                        if (b /= a .and. b /= i .and. b /= j) then
                            ex(2, :) = [a, b]
                            matEl = abs(get_offdiag_helement_k_sp_hub(nI, ex, .false.))
                        end if
                    end if
                    ! most excitations will have a finite amplitude, so store all
                    excit_cache(i, j, a) = matEl
                end do
            end do
        end do

    end subroutine initialize_excit_table

    subroutine check_k_space_hubbard_input()
        character(*), parameter :: this_routine = "check_k_space_hubbard_input"

        if (iProcIndex == root) then
            print *, "checking input for k-space hubbard:"
            if (any(t_anti_periodic)) &
                call stop_all(this_routine, "anti-periodic BCs not implemented for k-space Hubbard")
            !todo: find the incompatible input and abort here!
            print *, "input is fine!"
        end if

    end subroutine check_k_space_hubbard_input

    subroutine gen_excit_k_space_hub(nI, ilutI, nJ, ilutJ, exFlag, ic, &
                                     ex, tParity, pGen, hel, store, run)
        !! An API interfacing function for generate_excitation to the rest of NECI:
        !!
        !! Requires guga_bitRepOps::current_csf_i to be set according to the ilutI.
        integer, intent(in) :: nI(nel), exFlag
        integer(n_int), intent(in) :: ilutI(0:NIfTot)
        integer, intent(out) :: nJ(nel), ic, ex(2, maxExcit)
        integer(n_int), intent(out) :: ilutJ(0:NifTot)
        real(dp), intent(out) :: pGen
        logical, intent(out) :: tParity
        HElement_t(dp), intent(out) :: hel
        type(excit_gen_store_type), intent(inout), target :: store
        integer, intent(in), optional :: run
#ifdef DEBUG_
        character(*), parameter :: this_routine = "gen_excit_k_space_hub"
#endif
        real(dp) :: p_elec, p_orb
        integer :: elecs(2), orbs(2), src(2)
        type(ExcitationInformation_t) :: excitInfo
        integer(n_int) :: ilutGj(0:nifguga)

        unused_var(exFlag); unused_var(store); unused_var(run)

        hel = h_cast(0.0_dp)
        ic = 0
        pgen = 0.0_dp

        ! i first have to choose an electron pair (ij) at random
        ! but with the condition that they have to have opposite spin!
        call pick_spin_opp_elecs(nI, elecs, p_elec)

        src = nI(elecs)

        call pick_ab_orbitals_hubbard(nI, ilutI, src, orbs, p_orb)

        if (orbs(1) == ABORT_EXCITATION) then
            nJ(1) = ABORT_EXCITATION
            pgen = 0.0_dp
            return
        end if

        ic = 2

        ! and make the excitation
        call make_double(nI, nJ, elecs(1), elecs(2), orbs(1), orbs(2), ex, tParity)

        ilutJ = make_ilutJ(ilutI, ex, 2)

        ! i think in both the electrons and the orbitals i have twice the
        ! probability to pick them
        ! already modified in the orbital picker..
        pgen = p_elec * p_orb

        ! try implementing the crude guga excitation approximation via the
        ! determinant excitation generator
        if (tGen_guga_crude) then

            call convert_ilut_toGUGA(ilutJ, ilutGj)

            if (.not. isProperCSF_ilut(ilutGJ, .true.)) then
                nJ(1) = 0
                pgen = 0.0_dp
                return
            end if

            ASSERT(is_compatible(ilutI, current_csf_i))
            call calc_guga_matrix_element(ilutI, current_csf_i, ilutJ, CSF_Info_t(ilutJ), excitInfo, hel, .true.)

            if (abs(hel) < EPS) then
                nJ(1) = 0
                pgen = 0.0_dp
            end if

            global_excitinfo = excitInfo

            return
        end if

#ifdef DEBUG_
        if (.not. isvaliddet(nI, nel)) then
            if (nJ(1) /= 0) then
                print *, "nI: ", nI
                print *, "nJ: ", nJ
                print *, "src: ", ex(1, :)
                print *, "tgt: ", ex(2, :)
            end if
        end if
#endif

    end subroutine gen_excit_k_space_hub

    subroutine gen_excit_uniform_k_space_hub(nI, ilutI, nJ, ilutJ, exFlag, ic, ex, &
                                             tParity, pGen, hel, store, run)
        integer, intent(in) :: nI(nel), exFlag
        integer(n_int), intent(in) :: ilutI(0:NIfTot)
        integer, intent(out) :: nJ(nel), ic, ex(2, maxExcit)
        real(dp), intent(out) :: pGen
        logical, intent(out) :: tParity
        type(excit_gen_store_type), intent(inout), target :: store
        integer, intent(in), optional :: run

        ! not used
        HElement_t(dp), intent(out) :: hel
        integer(n_int), intent(out) :: ilutJ(0:NifTot)

        real(dp) :: p_elec, r
        integer :: a, b, i, elecs(2)
        integer, parameter :: maxTrials = 1000

        unused_var(store)
        unused_var(exFlag)

        if (present(run)) then
            unused_var(run)
        end if

        hel = h_cast(0.0_dp)

        ilutJ = 0
        ic = 0

        nJ = nI

        ! first, get two electrons
        call pick_spin_opp_elecs(nI, elecs, p_elec)

        ! uniform random excit gen probability
        pGen = 1.0_dp / (nbasis - nel) * 2.0_dp / (nOccAlpha * nOccBeta)

        ! try finding an allowed excitation
        do i = 1, maxTrials
            ! we do this by picking random momenta and checking if the targets are
            ! empty
            r = genrand_real2_dSFMT()
            ! this is our random orb
            a = INT(nBasis * r) + 1
            ! only empty targets are of interest
            if (IsOcc(ilutI, a)) cycle

            ! now, get the missing momentum
            b = get_orb_from_kpoints(nI(elecs(1)), nI(elecs(2)), a)
            ! and check if its empty and differs from a
            if (IsOcc(ilutI, b) .or. a == b) then
                ! if not, the excitation is rejected (!)
                nJ(1) = 0
                return
            end if

            call make_double(nI, nJ, elecs(1), elecs(2), a, b, ex, tParity)
            ic = 2
            exit
        end do

    end subroutine gen_excit_uniform_k_space_hub

    subroutine gen_excit_uniform_k_space_hub_transcorr(nI, ilutI, nJ, ilutJ, &
                                                       exFlag, ic, ex, tParity, pgen, hel, store, run)
        integer, intent(in) :: nI(nel), exFlag
        integer(n_int), intent(in) :: ilutI(0:NIfTot)
        integer, intent(out) :: nJ(nel), ic, ex(2, maxExcit)
        integer(n_int), intent(out) :: ilutJ(0:NIfTot)
        real(dp), intent(out) :: pgen
        logical, intent(out) :: tParity
        HElement_t(dp), intent(out) :: hel
        type(excit_gen_store_type), intent(inout), target :: store
        integer, intent(in), optional :: run
#ifdef DEBUG_
        character(*), parameter :: this_routine = "gen_excit_uniform_k_space_hub_transcorr"
#endif

        unused_var(exFlag)
        unused_var(store)
        if (present(run)) then
            unused_var(run)
        end if

        hel = h_cast(0.0_dp)
        ilutJ = 0_n_int

        ic = 0
        nJ(1) = 0

        ! try a change that we do the doubles always in a uniform way
        ! and only weight the triples.. since the triples are not so
        ! expensive, but they are decisive in the time-step ratio!
        ! especially for larger U and bigger lattices!
        if (genrand_real2_dsfmt() < pDoubles) then

            ! double excitation
            ic = 2

            if (genrand_real2_dsfmt() < pParallel) then

                call gen_uniform_double_para(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)

                pgen = pgen * pDoubles * pParallel

            else

                call gen_uniform_double_anti(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)

                pgen = pgen * pDoubles * (1.0_dp - pParallel)

            end if

        else
            ! triple excitation
            ic = 3

            call gen_uniform_triple(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)

            pgen = pgen * (1.0_dp - pDoubles)

        end if

#ifdef DEBUG_
        if (nJ(1) /= 0) then
            if (abs(pgen - calc_pgen_k_space_hubbard_uniform_transcorr(ex, ic)) > EPS) then
                print *, "nI: ", nI
                print *, "nJ: ", nJ
                print *, "ic: ", ic
                print *, "calc. pgen: ", calc_pgen_k_space_hubbard_uniform_transcorr(ex, ic)
                print *, "prd. pgen: ", pgen
                call stop_all(this_routine, "pgens wrong!")
            end if
        end if
#endif

    end subroutine gen_excit_uniform_k_space_hub_transcorr

    subroutine gen_excit_mixed_k_space_hub_transcorr(nI, ilutI, nJ, ilutJ, &
                                                     exFlag, ic, ex, tParity, pgen, hel, store, run)
        ! excitation generator, which makes doubles uniform and triples in a
        ! weighted fashion to reduce cost, but at the same time increase
        ! the time-step and the sampling
        integer, intent(in) :: nI(nel), exFlag
        integer(n_int), intent(in) :: ilutI(0:NIfTot)
        integer, intent(out) :: nJ(nel), ic, ex(2, 3)
        integer(n_int), intent(out) :: ilutJ(0:NIfTot)
        real(dp), intent(out) :: pgen
        logical, intent(out) :: tParity
        HElement_t(dp), intent(out) :: hel
        type(excit_gen_store_type), intent(inout), target :: store
        integer, intent(in), optional :: run
#ifdef DEBUG_
        character(*), parameter :: this_routine = "gen_excit_mixed_k_space_hub_transcorr"
#endif

        unused_var(exFlag)
        unused_var(store)
        unused_var(run)

        hel = h_cast(0.0_dp)
        ilutJ = 0_n_int
        ic = 0
        nJ(1) = 0

        if (genrand_real2_dsfmt() < pDoubles) then

            ! double excitation
            ic = 2

            if (genrand_real2_dsfmt() < pParallel) then

                call gen_uniform_double_para(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)

                pgen = pgen * pDoubles * pParallel

            else

                call gen_uniform_double_anti(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)

                pgen = pgen * pDoubles * (1.0_dp - pParallel)

            end if

        else
            ! triple excitation
            ic = 3
            call gen_triple_hubbard(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
            pgen = pgen * (1.0_dp - pDoubles)

        end if

#ifdef DEBUG_
        if (nJ(1) /= 0) then
            if (abs(pgen - calc_pgen_mixed_k_space_hub_transcorr(nI, ilutI, ex, ic)) > EPS) then
                print *, "nI: ", nI
                print *, "nJ: ", nJ
                print *, "ic: ", ic
                print *, "calc. pgen: ", calc_pgen_mixed_k_space_hub_transcorr(nI, ilutI, ex, ic)
                print *, "prd. pgen: ", pgen
                call stop_all(this_routine, "pgens wrong!")
            end if
        end if
#endif

    end subroutine gen_excit_mixed_k_space_hub_transcorr

    function calc_pgen_mixed_k_space_hub_transcorr(nI, ilutI, ex, ic) result(pgen)
        integer, intent(in) :: nI(nel), ex(:, :), ic
        integer(n_int), intent(in) :: ilutI(0:niftot)
        real(dp) :: pgen
        real(dp) :: p_elec, p_orb

        if (ic == 2) then

            pgen = pDoubles

            if (same_spin(ex(1, 1), ex(1, 2))) then
                pgen = pgen * pParallel

                if (is_beta(ex(1, 1))) then
                    p_elec = 1.0_dp / real(nOccBeta * (nOccBeta - 1), dp)
                    p_orb = 2.0_dp / real(nbasis / 2 - nOccBeta, dp)
                else
                    p_elec = 1.0_dp / real(nOccAlpha * (nOccAlpha - 1), dp)
                    p_orb = 2.0_dp / real(nbasis / 2 - nOccAlpha, dp)
                end if

            else
                pgen = pgen * (1.0_dp - pParallel)
                p_elec = 1.0_dp / real(nOccBeta * nOccAlpha, dp)

                p_orb = 2.0_dp / real(nbasis - nel, dp)

            end if

            pgen = pgen * p_elec * p_orb

        else

            pgen = calc_pgen_k_space_hubbard_triples(nI, ilutI, ex, ic)
            pgen = pgen * (1.0_dp - pDoubles) * 2.0_dp

        end if

    end function calc_pgen_mixed_k_space_hub_transcorr

    subroutine gen_uniform_double_para(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
        ! routine to do a uniform paralles spin double excitation
        integer, intent(in) :: nI(nel)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: nJ(nel), ex(2, 3)
        integer(n_int), intent(out) :: ilutJ(0:niftot)
        logical, intent(out) :: tParity
        real(dp), intent(out) :: pgen
#ifdef DEBUG_
        character(*), parameter :: this_routine = "gen_uniform_double_para"
#endif
        integer :: elecs(2), ispn, spin, i, a, b
        integer, parameter :: max_trials = 1000
        real(dp) :: p_elec, p_orb

        nJ(1) = 0
        ! pick two spin-parallel electrons
        call pick_spin_par_elecs(nI, elecs, p_elec, ispn)

        ! i have to figure out the probabilty of two spin-parallel
        if (ispn == 1) then
            spin = 1
            p_orb = 1.0_dp / real(nbasis / 2 - nOccBeta, dp)
        else if (ispn == 3) then
            spin = 2
            p_orb = 1.0_dp / real(nbasis / 2 - nOccAlpha, dp)
#ifdef DEBUG_
        else
            call stop_all(this_routine, "no parallel spin!")
#endif
        end if

        ! pick 2 holes now
        do i = 1, max_trials

            a = 2 * int(nbasis / 2 * genrand_real2_dsfmt()) + spin

            if (IsOcc(ilutI, a)) cycle

            b = get_orb_from_kpoints(nI(elecs(1)), nI(elecs(2)), a)

            ! do we have to reject or can we cycle if not fitting?
            ! a == b test has to be here for the spin-parallel
            ! excitations!
            ! try not returning but cycling
            if (IsOcc(ilutI, b) .or. a == b) then
                nJ(1) = 0
                return
            end if

            call make_double(nI, nJ, elecs(1), elecs(2), a, b, ex, tParity)

            ilutJ = make_ilutJ(ilutI, ex, 2)
            exit
        end do

        ! times 2, since both ab, ba orders are possible
        pgen = p_elec * p_orb * 2.0_dp

    end subroutine gen_uniform_double_para

    subroutine gen_uniform_double_anti(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
        ! routine to do a uniform anti-parallel spin double excitation
        integer, intent(in) :: nI(nel)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: nJ(nel), ex(2, 3)
        integer(n_int), intent(out) :: ilutJ(0:niftot)
        logical, intent(out) :: tParity
        real(dp), intent(out) :: pgen
        integer :: a, b, i, elecs(2)
        integer, parameter :: max_trials = 1000
        real(dp) :: p_elec, p_orb

        call pick_spin_opp_elecs(nI, elecs, p_elec)

        p_orb = 2.0_dp / real(nbasis - nel, dp)

        nJ(1) = 0

        ! pick 2 holes now
        do i = 1, max_trials

            a = int(nbasis * genrand_real2_dsfmt()) + 1

            if (IsOcc(ilutI, a)) cycle

            b = get_orb_from_kpoints(nI(elecs(1)), nI(elecs(2)), a)

            ! do we have to reject or can we cycle if not fitting?
            ! a == b test has to be here for the spin-parallel
            ! excitations!
            if (IsOcc(ilutI, b) .or. a == b) then
                nJ(1) = 0
                return
            end if

            call make_double(nI, nJ, elecs(1), elecs(2), a, b, ex, tParity)

            ilutJ = make_ilutJ(ilutI, ex, 2)
            exit
        end do

        pgen = p_elec * p_orb

    end subroutine gen_uniform_double_anti

    subroutine gen_uniform_triple(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
        ! routine to do a uniform triple excitation
        integer, intent(in) :: nI(nel)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: nJ(nel), ex(2, 3)
        integer(n_int), intent(out) :: ilutJ(0:niftot)
        logical, intent(out) :: tParity
        real(dp), intent(out) :: pgen
#ifdef DEBUG_
        character(*), parameter :: this_routine = "gen_uniform_triple"
#endif
        integer :: a, b, c, elecs(3), i, ispn, src(3), sum_ms
        integer, parameter :: max_trials = 1000
        real(dp) :: p_elec, p_orb, p_orb_a

        nJ(1) = 0

        call pick_three_opp_elecs(nI, elecs, p_elec, sum_ms)
        src = nI(elecs)

        ASSERT(sum_ms == -1 .or. sum_ms == 1)

        call pick_a_orbital_hubbard(ilutI, a, p_orb_a, sum_ms)

        ! and now i have to pick orbital b and fitting c in a uniform
        ! way.. i hope this still works with the probabilities
        ! if A is beta, we need to pick a alpha B uniformly and vv.
        if (is_beta(a)) then
            p_orb = 1.0_dp / real(nbasis / 2 - nOccAlpha, dp)
            ! also use a spin to specify the spin-orbital
            ! is a is beta we want an alpha -> so add +1
            ispn = 0
        else
            p_orb = 1.0_dp / real(nBasis / 2 - nOccBeta, dp)
            ispn = 1
        end if

        do i = 1, max_trials

            b = 2 * (1 + int(genrand_real2_dsfmt() * nbasis / 2)) - ispn

            if (IsOcc(ilutI, b)) cycle

            c = get_orb_from_kpoints_three(src, a, b)

            if (IsOcc(ilutI, c) .or. b == c) then
                nJ(1) = 0
                return
            end if

            call make_triple(nI, nJ, elecs, [a, b, c], ex, tParity)

            ilutJ = make_ilutJ(ilutI, ex, 3)
            exit
        end do

        ! times 2 since BC <> CB is both possible
        pgen = p_elec * p_orb * p_orb_a * 2.0_dp

    end subroutine gen_uniform_triple

    subroutine gen_excit_k_space_hub_transcorr(nI, ilutI, nJ, ilutJ, exFlag, ic, &
                                               ex, tParity, pGen, hel, store, run)

        integer, intent(in) :: nI(nel), exFlag
        integer(n_int), intent(in) :: ilutI(0:NIfTot)
        integer, intent(out) :: nJ(nel), ic
        integer, intent(out) :: ex(2, maxExcit)
        integer(n_int), intent(out) :: ilutJ(0:NifTot)
        real(dp), intent(out) :: pGen
        logical, intent(out) :: tParity
        HElement_t(dp), intent(out) :: hel
        type(excit_gen_store_type), intent(inout), target :: store
        integer, intent(in), optional :: run

        if (genrand_real2_dsfmt() < pDoubles) then
            if (genrand_real2_dsfmt() < pParallel) then
                ! do a parallel triple excitation, coming from the triples..
                call gen_parallel_double_hubbard(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
                ic = 2
                pgen = pgen * pDoubles * pParallel

            else
                ! do a "normal" hubbard k-space excitation
                call gen_excit_k_space_hub(nI, ilutI, nJ, ilutJ, exFlag, ic, &
                                           ex, tParity, pGen, hel, store, run)

                pgen = pgen * pDoubles * (1.0_dp - pParallel)

            end if
        else
            ! otherwise to a triple..
            ic = 3

            call gen_triple_hubbard(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
            pgen = pgen * (1.0_dp - pDoubles)

        end if

    end subroutine gen_excit_k_space_hub_transcorr

    ! make an exact copy of the transcorrelation excitation generator to
    ! run the stochastic test driver on it! so it must have the same
    ! interface as the other excitation generators!
    subroutine gen_excit_uniform_k_space_hub_test(nI, ilutI, nJ, ilutJ, exFlag, ic, &
                                                  ex, tParity, pGen, hel, store, run)

        integer, intent(in) :: nI(nel), exFlag
        integer(n_int), intent(in) :: ilutI(0:NIfTot)
        integer, intent(out) :: nJ(nel), ic
        integer, intent(out) :: ex(2, maxExcit)
        integer(n_int), intent(out) :: ilutJ(0:NifTot)
        real(dp), intent(out) :: pGen
        logical, intent(out) :: tParity
        HElement_t(dp), intent(out) :: hel
        type(excit_gen_store_type), intent(inout), target :: store
        integer, intent(in), optional :: run
        integer :: elecs(3), temp_ex(2, 3)

        unused_var(exFlag)
        unused_var(store)
        unused_var(run)

        hel = h_cast(0.0_dp)
        ilutJ = 0_n_int
        ic = 0
        nJ(1) = 0
        elecs = 0
        ex = 0

        if (genrand_real2_dsfmt() < pDoubles) then

            ! double excitation
            ic = 2

            if (genrand_real2_dsfmt() < pParallel) then

                call gen_uniform_double_para(nI, ilutI, nJ, ilutJ, temp_ex, tParity, pgen)

                pgen = pgen * pDoubles * pParallel

            else

                call gen_uniform_double_anti(nI, ilutI, nJ, ilutJ, temp_ex, tParity, pgen)

                pgen = pgen * pDoubles * (1.0_dp - pParallel)

            end if
        else
            ! triple excitation
            ic = 3

            call gen_uniform_triple(nI, ilutI, nJ, ilutJ, temp_ex, tParity, pgen)

            pgen = pgen * (1.0_dp - pDoubles)

        end if

    end subroutine gen_excit_uniform_k_space_hub_test

    ! make an exact copy of the transcorrelation excitation generator to
    ! run the stochastic test driver on it! so it must have the same
    ! interface as the other excitation generators!
    subroutine gen_excit_k_space_hub_transcorr_test(nI, ilutI, nJ, ilutJ, exFlag, ic, &
                                                    ex, tParity, pGen, hel, store, run)

        integer, intent(in) :: nI(nel), exFlag
        integer(n_int), intent(in) :: ilutI(0:NIfTot)
        integer, intent(out) :: nJ(nel), ic
        integer, intent(out) :: ex(2, maxExcit)
        integer(n_int), intent(out) :: ilutJ(0:NifTot)
        real(dp), intent(out) :: pGen
        logical, intent(out) :: tParity
        HElement_t(dp), intent(out) :: hel
        type(excit_gen_store_type), intent(inout), target :: store
        integer, intent(in), optional :: run
        integer :: temp_ex(2, 3)

        if (genrand_real2_dsfmt() < pDoubles) then
            if (genrand_real2_dsfmt() < pParallel) then
                ! do a parallel triple excitation, coming from the triples..
                call gen_parallel_double_hubbard(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
                ic = 2
                pgen = pgen * pDoubles * pParallel
            else
                ! do a "normal" hubbard k-space excitation
                call gen_excit_k_space_hub(nI, ilutI, nJ, ilutJ, exFlag, ic, &
                                           ex, tParity, pGen, hel, store, run)

                pgen = pgen * pDoubles * (1.0_dp - pParallel)
            end if
        else
            ! otherwise to a triple..
            call gen_triple_hubbard(nI, ilutI, nJ, ilutJ, temp_ex, tParity, pgen)
            ic = 3
            pgen = pgen * (1.0_dp - pDoubles)

        end if

    end subroutine gen_excit_k_space_hub_transcorr_test

    subroutine gen_triple_hubbard(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
        ! i think i should calculat the matrix element in here already!
        ! in this case.. otherwise i have to carry the tParity and ex
        ! all the way through the rest of the code and this makes problems
        ! i guess, since triples are actually never considered .. todo
        integer, intent(in) :: nI(nel)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: nJ(nel), ex(2, 3)
        integer(n_int), intent(out) :: ilutJ(0:niftot)
        logical, intent(out) :: tParity
        real(dp), intent(out) :: pgen
        integer :: elecs(3), orbs(3), src(3), sum_ms
        real(dp) :: p_elec, p_orb(2)

        ! first we pick 3 electrons in this case ofc.
        ! with the restriction, that they must not be all the same spin!
        call pick_three_opp_elecs(nI, elecs, p_elec, sum_ms)

        src = nI(elecs)
        ! then i pick 1 orbital? maybe..
        ! if this orbital is of the minority spin type, the other 2 orbitals
        ! MUST be of parallel spin..
        ! if the orbital is of the majority spin type the remaining
        ! orbitals must be of opposite spin type..
        ! can i take that into account with a tailored get_orb_from_kpoints()
        ! function for 3 electrons or should i decide here if we always
        ! want to do a specific picking order (restricting pgens, but making
        ! it easier to handle algorithmically) or if we want full flexibility
        ! (increasing pgens, but kind of making it a bit more difficult..)
        ! NO: we decide to always pick the minority spin first!
        call pick_a_orbital_hubbard(ilutI, orbs(1), p_orb(1), sum_ms)

        ! and pick the remaining two orbitals (essentially it is only
        ! one, since the last is restricted due to momentum conservation!)
        call pick_bc_orbitals_hubbard(nI, ilutI, src, orbs(1), orbs(2:3), p_orb(2))

        if (orbs(2) == 0) then
            nJ(1) = ABORT_EXCITATION
            pgen = 0.0_dp
            return
        end if

        ! so now.. did Robert make a routine like:
        call make_triple(nI, nJ, elecs, orbs, ex, tParity)

        ilutJ = make_ilutJ(ilutI, ex, 3)

        ! i am not sure about the factor of 2 here.. maybe this is already
        ! taken into account in the orbital creation
        pgen = p_elec * product(p_orb)

    end subroutine gen_triple_hubbard

    subroutine pick_bc_orbitals_hubbard(nI, ilutI, src, orb_a, orbs, p_orb)
        ! this is the main routine, which picks the final (b,c) orbital for
        ! the 3-body excitation
        integer, intent(in) :: nI(nel), src(3), orb_a
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: orbs(2)
        real(dp), intent(out) :: p_orb
#ifdef DEBUG_
        character(*), parameter :: this_routine = "pick_bc_orbitals_hubbard"
        real(dp) :: test
#endif
        real(dp) :: cum_arr(nbasis / 2), cum_sum
        integer :: orb_list(nbasis / 2, 2), ind

        ! do it similar to the other routines..
        call create_bc_list_hubbard(nI, ilutI, src, orb_a, orb_list, cum_arr, cum_sum)

        if (cum_sum < EPS) then
            orbs(1) = ABORT_EXCITATION
            return
        end if

        call pick_from_cum_list(cum_arr, cum_sum, ind, p_orb)

        orbs = orb_list(ind, :)

#ifdef DEBUG_
        ! the influence of orb_a is important in the pgen recalc!!
        call create_bc_list_hubbard(nI, ilutI, src, orb_a, orb_list, cum_arr, cum_sum, &
                                    orbs(2), test)

        if (abs(test - p_orb) > 1.e-8) then
            print *, "pgen assumption wrong: in ", this_routine
            print *, "p_orb: ", p_orb
            print *, "test: ", test
            print *, "ijk: ", src
            print *, "a: ", orb_a
            print *, "orbs: ", orbs
            print *, "cum_arr: ", cum_arr
            print *, "orb_list(:,1): ", orb_list(:, 1)
            print *, "orb_list(:,2): ", orb_list(:, 2)
        end if
#endif

        p_orb = 2.0_dp * p_orb

    end subroutine pick_bc_orbitals_hubbard

    subroutine create_bc_list_hubbard(nI, ilutI, src, orb_a, orb_list, cum_arr, &
                                      cum_sum, tgt, cpt)
        integer, intent(in) :: nI(nel), src(3), orb_a
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: orb_list(nbasis / 2, 2)
        real(dp), intent(out) :: cum_arr(nbasis / 2), cum_sum
        integer, intent(in), optional :: tgt
        real(dp), intent(out), optional :: cpt
#ifdef DEBUG_
        character(*), parameter :: this_routine = "create_bc_list_hubbard"
#endif
        integer :: b, c, ex(2, 3), spin, orb_b
        real(dp) :: elem
        integer :: nJ(nel)
        integer, allocatable :: ex2(:, :)

        orb_list = -1
        cum_arr = 0.0_dp
        cum_sum = 0.0_dp
        ex(1, :) = src
        ex(2, 1) = orb_a

        ! we want to do a restriction! to make it easier to recalculate the
        ! pgens and stuff!
        ! the restriction is, that the first picked orbital (a) is of the
        ! minority spin of the picked electrons.
        ! so if we have to alpha and 1 beta electron picked, orbital (a) is
        ! a beta orbital and the last 2 orbitals will be alpha and v.v.

        ! decide that the  first orbital orb_a, which is already picked is the
        ! minority spin electron, so now pick two electrons of the opposite
        ! spin
        if (is_beta(orb_a)) then
            ! then we want alpha orbitals
            spin = 0
            ! and also be sure that we did the right thing until now,
            ! otherwise it breaks
            ASSERT(sum(get_spin_pn(src)) == 1)
        else
            ! otherwise we want beta now
            spin = 1
            ASSERT(sum(get_spin_pn(src)) == -1)
        end if

        if (present(tgt)) then
            ASSERT(present(cpt))

            cpt = 0.0_dp

            ! does the spin of tgt fit?
            ! it must be opposite of orb_a!
            if (same_spin(orb_a, tgt)) return

            !TODO: we only need to consider one spin-type!!
            do b = 1, nbasis / 2
                elem = 0.0_dp

                ! convert to the appropriate spin-orbitals
                orb_b = 2 * b - spin

                if (IsNotOcc(ilutI, orb_b)) then
                    ! get the appropriate momentum conserverd orbital c
                    c = get_orb_from_kpoints_three(src, orb_a, orb_b)

                    if (c /= orb_b .and. IsNotOcc(ilutI, c)) then

                        ex(2, 2:3) = [orb_b, c]

                        ! actually i messed up with the non-hermiticity
                        ! i should actually switch the order of the
                        ! determinants in matrix element calculation
                        call swap_excitations(nI, ex, nJ, ex2)
                        elem = abs(get_3_body_helement_ks_hub(ex2, .false.))

                    end if
                end if
                cum_sum = cum_sum + elem
                if (tgt == orb_b) then
                    cpt = elem
                end if
            end do

            if (cum_sum < EPS) then
                cpt = 0.0_dp
            else
                cpt = cpt / cum_sum
            end if
        else
            do b = 1, nbasis / 2
                orb_b = 2 * b - spin

                elem = 0.0_dp
                c = 0

                if (IsNotOcc(ilutI, orb_b)) then
                    c = get_orb_from_kpoints_three(src, orb_a, orb_b)

                    if (c /= orb_b .and. IsNotOcc(ilutI, c)) then

                        ex(2, 2:3) = [orb_b, c]
                        call swap_excitations(nI, ex, nJ, ex2)
                        elem = abs(get_3_body_helement_ks_hub(ex2, .false.))
                    end if
                end if
                cum_sum = cum_sum + elem
                cum_arr(b) = cum_sum
                orb_list(b, :) = [orb_b, c]
            end do
        end if

    end subroutine create_bc_list_hubbard

    subroutine pick_a_orbital_hubbard(ilutI, orb, p_orb, sum_ms)
        ! hm... i think the first orbital can be picked totally random out
        ! of the empty orbitals or? since every spin and every momentum
        ! is allowed.. and i do not want to overdo the weighting i guess,
        ! especially not for the beginning
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: orb
        real(dp), intent(out) :: p_orb
        integer, intent(in), optional :: sum_ms
        integer :: spin

        ! if sum_ms is present, we pick the first orbital from the minority
        ! spins in the picked electrons -> so it is the opposite
        if (present(sum_ms)) then
            if (sum_ms == -1) then
                ! there a are 2 beta and one alpha electron picked ->
                ! so pick alpha here!
                spin = 0
                p_orb = 1.0_dp / real(nbasis / 2 - nOccAlpha, dp)
            else if (sum_ms == 1) then
                spin = -1
                p_orb = 1.0_dp / real(nbasis / 2 - nOccBeta, dp)
            end if

            do
                orb = 2 * (1 + int(genrand_real2_dsfmt() * nbasis / 2)) + spin

                if (IsNotOcc(ilutI, orb)) exit

            end do
        else

            do
                orb = 1 + int(genrand_real2_dsfmt() * nbasis)

                if (IsNotOcc(ilutI, orb)) exit

            end do

            p_orb = 1.0_dp / real(nbasis - nel, dp)
        end if

    end subroutine pick_a_orbital_hubbard

    subroutine gen_parallel_double_hubbard(nI, ilutI, nJ, ilutJ, ex, tParity, pgen)
        integer, intent(in) :: nI(nel)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: nJ(nel), ex(2, 2)
        integer(n_int), intent(out) :: ilutJ(0:niftot)
        logical, intent(out) :: tParity
        real(dp), intent(out) :: pgen
        real(dp) :: p_elec, p_orb
        integer :: elecs(2), orbs(2), src(2)

        ! in the transcorrelated case we have to decide
        ! i first have to choose an electron pair (ij) at random
        ! but with the condition that they have to have opposite spin!
        ! this is the only difference: i pick two spin-parallel electrons..
        ! the rest stays the same.. i just have to adjust the
        ! get_orb_from_kpoints routine
        ! and the matrix element calculation
        call pick_spin_par_elecs(nI, elecs, p_elec)

        src = nI(elecs)

        ! i realised i could reuse the already implemented orbital picker,
        ! but the other one does not use the fact that we know that we
        ! have parallel spin in this case! so implement a parallel
        ! spin-orbital picker!! (also better for matrix elements.. so we
        ! must not check if the spins fit, if we only take the correct ones!)
        call pick_ab_orbitals_par_hubbard(nI, ilutI, src, orbs, p_orb)

        if (orbs(1) == ABORT_EXCITATION) then
            nJ(1) = ABORT_EXCITATION
            pgen = 0.0_dp
            return
        end if

        ! and make the excitation
        call make_double(nI, nJ, elecs(1), elecs(2), orbs(1), orbs(2), ex, tParity)

        ilutJ = make_ilutJ(ilutI, ex, 2)

        ! i think in both the electrons and the orbitals i have twice the
        ! probability to pick them
        ! already modified in the orbital picker!
        ! i am not super sure about a factor of 2 here..
        pgen = p_elec * p_orb

    end subroutine gen_parallel_double_hubbard

    subroutine pick_ab_orbitals_par_hubbard(nI, ilutI, src, orbs, p_orb)
        integer, intent(in) :: nI(nel), src(2)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: orbs(2)
        real(dp), intent(out) :: p_orb
#ifdef DEBUG_
        character(*), parameter :: this_routine = "pick_ab_orbitals_par_hubbard"
        real(dp) :: test, test_arr(nBasis / 2)
        integer :: ex(2, 2)
#endif
        real(dp) :: cum_arr(nbasis / 2)
        real(dp) :: cum_sum
        integer :: orb_list(nbasis / 2, 2)
        integer :: ind

        ! without transcorrelation factor this is uniform, but with a
        ! transcorrelation factor the matrix element might change and so also
        ! the pgen should change.

        call create_ab_list_par_hubbard(nI, ilutI, src, orb_list, cum_arr, cum_sum)

        if (cum_sum < EPS) then
            p_orb = 0.0_dp
            orbs(1) = ABORT_EXCITATION
            return
        end if

        ! this stuff is also written so often i should finally make a routine
        ! out of that
        call pick_from_cum_list(cum_arr, cum_sum, ind, p_orb)

        orbs = orb_list(ind, :)

#ifdef DEBUG_
        ! check that the other way of picking the orbital has the same
        ! probability..
        call create_ab_list_par_hubbard(nI, ilutI, src, orb_list, test_arr, cum_sum, &
                                        orbs(2), test)

        if (abs(test - p_orb) > 1.e-8) then
            print *, "pgen assumption wrong in ", this_routine
            print *, "nI: ", nI
            print *, "p_orb: ", p_orb
            print *, "test: ", test
            print *, "ij: ", src
            print *, "orbs: ", orbs
            print *, "cum_arr: ", cum_arr
            print *, "test_arr: ", test_arr
        end if

        !todo: also call the calc_pgen_k_space_hubbard here and check
        ! pgens
        ex(1, :) = src
        ex(2, :) = orbs

#endif

        ! do i have to recalc. the pgen the other way around? yes!
        ! effectively reuse the above functionality
        ! i am pretty sure i just have to find the position in the
        ! list.. OR: since in the hubbard it is just twice the
        ! probability or? i am pretty sure yes.. but for all of them..
        ! so in the end it shouldnt matter again..
        p_orb = 2.0_dp * p_orb

    end subroutine pick_ab_orbitals_par_hubbard

    subroutine pick_ab_orbitals_hubbard(nI, ilutI, src, orbs, p_orb)
        ! depending on the already picked electrons (ij) pick an orbital
        ! (a) and the connected orbital (b)
        integer, intent(in) :: nI(nel), src(2)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: orbs(2)
        real(dp), intent(out) :: p_orb
#ifdef DEBUG_
        real(dp) :: test
        integer :: ex(2, 2)
#endif
        real(dp) :: cum_arr(nbasis)
        real(dp) :: cum_sum
        integer :: orb_list(nbasis, 2)
        integer :: ind

        ! without transcorrelation factor this is uniform, but with a
        ! transcorrelation factor the matrix element might change and so also
        ! the pgen should change.
        call create_ab_list_hubbard(nI, ilutI, src, orb_list, cum_arr, cum_sum)

        if (cum_sum < EPS) then
            orbs(1) = ABORT_EXCITATION
            return
        end if

        ! this stuff is also written so often i should finally make a routine
        ! out of that
        call pick_from_cum_list(cum_arr, cum_sum, ind, p_orb)

        orbs = orb_list(ind, :)

#ifdef DEBUG_
        ! check that the other way of picking the orbital has the same
        ! probability..
        call create_ab_list_hubbard(nI, ilutI, src, orb_list, cum_arr, cum_sum, &
                                    orbs(2), test)

        if (abs(test - p_orb) > 1.e-8) then
            print *, "pgen assumption wrong:!"
            print *, "p_orb: ", p_orb
            print *, "test: ", test
            print *, "orbs: ", orbs
        end if

        !todo: also call the calc_pgen_k_space_hubbard here and check
        ! pgens
        ex(1, :) = src
        ex(2, :) = orbs

#endif

        ! do i have to recalc. the pgen the other way around? yes!
        ! effectively reuse the above functionality
        ! i am pretty sure i just have to find the position in the
        ! list.. OR: since in the hubbard it is just twice the
        ! probability or? i am pretty sure yes.. but for all of them..
        ! so in the end it shouldnt matter again..
        p_orb = 2.0_dp * p_orb

    end subroutine pick_ab_orbitals_hubbard

    subroutine create_ab_list_par_hubbard(nI, ilutI, src, orb_list, cum_arr, cum_sum, &
                                          tgt, cpt)
        integer, intent(in) :: nI(nel), src(2)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: orb_list(nbasis / 2, 2)
        real(dp), intent(out) :: cum_arr(nbasis / 2)
        real(dp), intent(out) :: cum_sum
        integer, intent(in), optional :: tgt
        real(dp), intent(out), optional :: cpt
#ifdef DEBUG_
        character(*), parameter :: this_routine = "create_ab_list_par_hubbard"
#endif
        integer :: a, b, ex(2, 2), spin, orb_a
        real(dp) :: elem
        integer :: nJ(nel)
        integer, allocatable :: ex2(:, :)
        ! do the cum_arr for the k-space hubbard
        ! i think here i might really use flags.. and not just do the
        ! influence over the matrix elements.. since without transcorrelation
        ! i would waste alot of effort if i calculate the matrix elements
        ! here all the time..
        orb_list = -1
        cum_arr = 0.0_dp
        cum_sum = 0.0_dp

        ex(1, :) = src

        ! this routine only checks for parallel spins, depending on src
        ASSERT(same_spin(src(1), src(2)))

        ! make a spin factor for the orbital conversion
        ! 0...alpha
        ! 1...beta
        spin = get_spin(src(1)) - 1

        ! and only loop over the correct spin
        if (present(tgt)) then
            ASSERT(present(cpt))

            cpt = 0.0_dp

            ! if target does not have the same spin, do we abort or return?
            if (.not. same_spin(src(1), tgt)) return

            do a = 1, nbasis / 2
                elem = 0.0_dp

                orb_a = 2 * a - spin

                if (IsNotOcc(ilutI, orb_a)) then
                    ! modify get_orb_from_kpoints to give spin-parallel
                    ! it already does i think!
                    b = get_orb_from_kpoints(src(1), src(2), orb_a)

                    if (b /= orb_a .and. IsNotOcc(ilutI, b)) then

                        ex(2, :) = [orb_a, b]

                        call swap_excitations(nI, ex, nJ, ex2)
                        elem = abs(get_offdiag_helement_k_sp_hub(nJ, ex2, .false.))

                    end if
                end if
                cum_sum = cum_sum + elem
                if (tgt == orb_a) then
                    cpt = elem
                end if
            end do
            if (cum_sum < EPS) then
                cpt = 0.0_dp
            else
                cpt = cpt / cum_sum
            end if
        else
            do a = 1, nbasis / 2
                orb_a = 2 * a - spin

                elem = 0.0_dp
                b = 0

                if (IsNotOcc(ilutI, orb_a)) then
                    b = get_orb_from_kpoints(src(1), src(2), orb_a)

                    if (b /= orb_a .and. IsNotOcc(ilutI, b)) then
                        ex(2, :) = [orb_a, b]
                        call swap_excitations(nI, ex, nJ, ex2)
                        elem = abs(get_offdiag_helement_k_sp_hub(nJ, ex2, .false.))
                    end if
                end if
                cum_sum = cum_sum + elem
                cum_arr(a) = cum_sum
                orb_list(a, :) = [orb_a, b]
            end do
        end if

    end subroutine create_ab_list_par_hubbard

    subroutine create_ab_list_hubbard(nI, ilutI, src, orb_list, cum_arr, cum_sum, &
                                      tgt, cpt)
        integer, intent(in) :: nI(nel), src(2)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(out) :: orb_list(nbasis, 2)
        real(dp), intent(out) :: cum_arr(nbasis)
        real(dp), intent(out) :: cum_sum
        integer, intent(in), optional :: tgt
        real(dp), intent(out), optional :: cpt
#ifdef DEBUG_
        character(*), parameter :: this_routine = "create_ab_list_hubbard"
#endif
        integer :: a, b, ex(2, 2)
        real(dp) :: elem
        integer :: nJ(nel)
        integer, allocatable :: ex2(:, :)

        ! do the cum_arr for the k-space hubbard
        ! i think here i might really use flags.. and not just do the
        ! influence over the matrix elements.. since without transcorrelation
        ! i would waste alot of effort if i calculate the matrix elements
        ! here all the time..
        orb_list = -1
        cum_arr = 0.0_dp
        cum_sum = 0.0_dp

        ex(1, :) = src

        ! we are also using this routine for the parallel excitations due to
        ! the transcorrelation factor.. this is nice, since it is easily
        ! reusable, but, loses alot of efficiency, since the we are looping
        ! over all spin-orbital, although we know we only want to loop over a
        ! certain spin!
        ! todo
        if (present(tgt)) then
            ASSERT(present(cpt))

            cpt = 0.0_dp

            ! OPTIMIZATION: Do not loop over nbasis here, but over a pre-computed
            ! lookup table of excitations for src (if possible, is not an option
            ! if the matrix element depends on nI)
            do a = 1, nbasis
                elem = 0.0_dp
                ! if a is empty
                if (IsNotOcc(ilutI, a)) then
                    ! i have to rewrite get_orb, so it gives me the same
                    ! spin if src has the same spin! todo
                    ! to take into account spin-parallel double
                    ! excitations!

                    ! get the excitation
                    b = get_orb_from_kpoints(src(1), src(2), a)
                    ! we have yet to check if b is unoccupied
                    if (b /= a .and. IsNotOcc(ilutI, b)) then
                        ! assert that we hit opposite spins
                        if (.not. t_trans_corr_2body) then
                            ASSERT(.not. same_spin(a, b))
                        end if
                        ! get the matrix element (from storage)
                        if (.not. (t_trans_corr .or. t_trans_corr_2body)) then
                            elem = excit_cache(src(1), src(2), a)
                        else
                            ex(2, :) = [a, b]
                            call swap_excitations(nI, ex, nJ, ex2)
                            elem = abs(get_offdiag_helement_k_sp_hub(nJ, ex2, .false.))
                        end if
                    end if
                end if
                cum_sum = cum_sum + elem

                if (tgt == a) then
                    cpt = elem
                end if
            end do
            if (cum_sum < EPS) then
                cpt = 0.0_dp
            else
                ! todo: maybe i have to multiply by 2 here.. since both
                ! direction possible..
                cpt = cpt / cum_sum
            end if
        else
            do a = 1, nbasis
                elem = 0.0_dp
                b = -1

                if (IsNotOcc(ilutI, a)) then
                    b = get_orb_from_kpoints(src(1), src(2), a)

                    if (b /= a .and. IsNotOcc(ilutI, b)) then
                        ! get the matrix element (from storage)
                        if (.not. (t_trans_corr .or. t_trans_corr_2body)) then
                            elem = excit_cache(src(1), src(2), a)
                        else
                            ex(2, :) = [a, b]
                            call swap_excitations(nI, ex, nJ, ex2)
                            elem = abs(get_offdiag_helement_k_sp_hub(nJ, ex2, .false.))
                        end if
                    end if
                end if
                cum_sum = cum_sum + elem
                cum_arr(a) = cum_sum
                orb_list(a, :) = [a, b]
            end do
        end if

    end subroutine create_ab_list_hubbard

    function calc_pgen_k_space_hubbard_uniform_transcorr(ex, ic) result(pgen)
        ! need a calc pgen functionality for the uniform transcorrelated
        ! excitation generator
        integer, intent(in) :: ex(:, :), ic
        real(dp) :: pgen
#ifdef DEBUG_
        character(*), parameter :: this_routine = "calc_pgen_k_space_hubbard_uniform_transcorr"
#endif
        real(dp) :: p_elec, p_orb
        integer :: sum_ms

        pgen = 0.0_dp

        if (ic == 2) then

            pgen = pDoubles

            if (same_spin(ex(1, 1), ex(1, 2))) then
                pgen = pgen * pParallel

                if (is_beta(ex(1, 1))) then
                    p_elec = 1.0_dp / real(nOccBeta * (nOccBeta - 1), dp)
                    p_orb = 2.0_dp / real(nbasis / 2 - nOccBeta, dp)
                else
                    p_elec = 1.0_dp / real(nOccAlpha * (nOccAlpha - 1), dp)
                    p_orb = 2.0_dp / real(nbasis / 2 - nOccAlpha, dp)
                end if

            else
                pgen = pgen * (1.0_dp - pParallel)
                p_elec = 1.0_dp / real(nOccBeta * nOccAlpha, dp)

                p_orb = 2.0_dp / real(nbasis - nel, dp)

            end if

        else
            pgen = 1.0_dp - pDoubles

            sum_ms = sum(get_spin_pn(ex(1, :)))

            ASSERT(sum_ms == 1 .or. sum_ms == -1)

            if (sum_ms == 1) then
                p_elec = 2.0_dp / real(nel * (nel - 1), dp) * &
                         (1.0_dp / real(nOccBeta, dp) + 2.0_dp / real(nel - 2, dp))

                p_orb = 1.0_dp / real(nbasis / 2 - nOccBeta, dp) * &
                        2.0_dp / real(nbasis / 2 - nOccAlpha, dp)

            else if (sum_ms == -1) then
                p_elec = 2.0_dp / real(nel * (nel - 1), dp) * &
                         (1.0_dp / real(nOccAlpha, dp) + 2.0_dp / real(nel - 2, dp))

                p_orb = 1.0_dp / real(nbasis / 2 - nOccAlpha, dp) * &
                        2.0_dp / real(nBasis / 2 - nOccBeta, dp)

            end if
        end if

        pgen = pgen * p_elec * p_orb

    end function calc_pgen_k_space_hubbard_uniform_transcorr

    function calc_pgen_k_space_hubbard_transcorr(nI, ilutI, ex, ic) result(pgen)
        ! this function i have to rewrite for the transcorrelated to take
        ! the same-spin doubles and triples into account!
        ! NOTE: ex could be of form (2,3) in the case of triples!
        integer, intent(in) :: nI(nel), ex(:, :), ic
        integer(n_int), intent(in) :: ilutI(0:niftot)
        real(dp) :: pgen

        pgen = 0.0_dp

        if (ic == 2) then
            if (same_spin(ex(1, 1), ex(1, 2))) then
                ! parallel double excitation
                ! the spins are checked within the function:
                pgen = calc_pgen_k_space_hubbard_par(nI, ilutI, ex, ic)

                pgen = pgen * pDoubles * pParallel

            else
                ! "normal" opposite spin excitation
                ! the spins are checked within the function:
                pgen = calc_pgen_k_space_hubbard(nI, ilutI, ex, ic)

                pgen = pgen * pDoubles * (1.0_dp - pParallel)
            end if
        else if (ic == 3) then
            pgen = calc_pgen_k_space_hubbard_triples(nI, ilutI, ex, ic)

            pgen = pgen * (1.0_dp - pDoubles)

        end if

    end function calc_pgen_k_space_hubbard_transcorr

    function calc_pgen_k_space_hubbard_triples(nI, ilutI, ex, ic) result(pgen)
        integer, intent(in) :: nI(nel), ex(:, :), ic
        integer(n_int), intent(in) :: ilutI(0:niftot)
        real(dp) :: pgen
#ifdef DEBUG_
        real(dp) :: test
#endif
        real(dp) :: p_elec, p_orb(2), cum_arr(nbasis / 2), cum_sum
        integer :: orb_list(nbasis / 2, 2), sum_ms, orb_a, orbs(2)

        if (ic /= 3) then
            pgen = 0.0_dp
            return
        end if

        sum_ms = sum(get_spin_pn(ex(1, :)))

        ! check spins
        if (.not. (sum_ms == 1 .or. sum_ms == -1) .or. sum_ms /= sum(get_spin_pn(ex(2, :)))) then
            pgen = 0.0_dp
            return
        end if

        ! get the probabilites for the electrons and orbital (a)
        if (sum_ms == 1) then
            p_elec = 1.0_dp / real(nel * (nel - 1), dp) * &
                     (1.0_dp / real(nOccBeta, dp) + 2.0_dp / real(nel - 2, dp))

            p_orb(1) = 1.0_dp / real(nbasis / 2 - nOccBeta, dp)

        else
            p_elec = 1.0_dp / real(nel * (nel - 1), dp) * &
                     (1.0_dp / real(nOccAlpha, dp) + 2.0_dp / real(nel - 2, dp))

            p_orb(1) = 1.0_dp / real(nbasis / 2 - nOccAlpha, dp)

        end if

        ! for this i need the minority spin orbital (a)
        orb_a = find_minority_spin(ex(2, :))

        orbs = pack(ex(2, :), ex(2, :) /= orb_a)

        call create_bc_list_hubbard(nI, ilutI, ex(1, :), orb_a, orb_list, cum_arr, &
                                    cum_sum, orbs(1), p_orb(2))

        pgen = p_elec * product(p_orb) * 2.0_dp

#ifdef DEBUG_
        call create_bc_list_hubbard(nI, ilutI, ex(1, :), orb_a, orb_list, cum_arr, &
                                    cum_sum, orbs(2), test)

        if (abs(test - p_orb(2)) > 1.e-8) then
            print *, "pgen assumption wrong:!"
            print *, "p_orb: ", p_orb(2)
            print *, "test: ", test
            print *, "ex(2,:): ", ex(2, :)
        end if
#endif

    end function calc_pgen_k_space_hubbard_triples

    function calc_pgen_k_space_hubbard_par(nI, ilutI, ex, ic) result(pgen)
        integer, intent(in) :: nI(nel), ex(:, :), ic
        integer(n_int), intent(in) :: ilutI(0:niftot)
        real(dp) :: pgen
#ifdef DEBUG_
        real(dp) :: test
#endif
        real(dp) :: p_elec, p_orb, cum_arr(nbasis / 2), cum_sum
        integer :: orb_list(nbasis / 2, 2)

        ! check ic:
        if (ic /= 2) then
            pgen = 0.0_dp
            return
        end if

        ! check spin:
        if (.not. (same_spin(ex(1, 1), ex(1, 2)) .and. same_spin(ex(2, 1), ex(2, 2)) .and. &
                   same_spin(ex(1, 1), ex(2, 1)))) then
            pgen = 0.0_dp
            return
        end if

        if (get_ispn(ex(1, 1:2)) == 1) then
            p_elec = 1.0_dp / real(nbasis / 2 - nOccBeta, dp)
        else
            p_elec = 1.0_dp / real(nbasis / 2 - nOccAlpha, dp)
        end if

        call create_ab_list_par_hubbard(nI, ilutI, ex(1, 1:2), orb_list, cum_arr, &
                                        cum_sum, ex(2, 1), p_orb)

        pgen = p_elec * p_orb * 2.0_dp

#ifdef DEBUG_
        call create_ab_list_par_hubbard(nI, ilutI, ex(1, 1:2), orb_list, cum_arr, &
                                        cum_sum, ex(2, 1), test)

        if (abs(test - p_orb) > 1.e-8) then
            print *, "pgen assumption wrong:!"
            print *, "p_orb: ", p_orb
            print *, "test: ", test
            print *, "ex(2,:): ", ex(2, :)
        end if

#endif

    end function calc_pgen_k_space_hubbard_par

    function calc_pgen_k_space_hubbard(nI, ilutI, ex, ic) result(pgen)
        integer(n_int), intent(in) :: ilutI(0:niftot)
        integer, intent(in) :: nI(nel), ex(2, 2), ic
        real(dp) :: pgen
#ifdef DEBUG_
        real(dp) :: test
#endif
        real(dp) :: p_elec, p_orb, cum_arr(nbasis), cum_sum
        integer :: orb_list(nbasis, 2), src(2)

        if (ic /= 2) then
            pgen = 0.0_dp
            return
        end if

        if (same_spin(ex(1, 1), ex(1, 2)) .or. same_spin(ex(2, 1), ex(2, 2))) then
            pgen = 0.0_dp
            return
        end if

        p_elec = 1.0_dp / real(nOccBeta * nOccAlpha, dp)

        src = get_src(ex)

        call create_ab_list_hubbard(nI, ilutI, src, orb_list, cum_arr, cum_sum, &
                                    ex(2, 1), p_orb)

#ifdef DEBUG_
        call create_ab_list_hubbard(nI, ilutI, src, orb_list, cum_arr, cum_sum, &
                                    ex(2, 2), test)

        if (abs(test - p_orb) > 1.e-8) then
            print *, "pgen assumption wrong:!"
            print *, "p_orb: ", p_orb
            print *, "test: ", test
            print *, "ex(2,:): ", ex(2, :)
        end if

#endif

        ! i do not need to recalc, the p(b|ij) since it is the same..
        ! but i need a factor of 2 somewhere.. figure that out!
        pgen = p_elec * p_orb * 2.0_dp

    end function calc_pgen_k_space_hubbard

    subroutine init_get_helement_k_space_hub

        if (iProcIndex == root) then
            print *, "initialize k-space get_helement pointer"
        end if

        if (t_trans_corr_2body) then
            three_body_prefac = real(bhub, dp) * 2.0_dp * (cosh(trans_corr_param_2body) - 1.0_dp) / real(omega**2, dp)
        end if

        call init_dispersion_rel_cache()
        call init_tmat_kspace(lat)
        call init_two_body_trancorr_fac_matrix()
        n_opp(-1) = real(nel / 2 + lms, dp)
        n_opp(1) = real(nel / 2 - lms, dp)
        call init_three_body_const_mat()

        get_umat_el => get_umat_kspace
        ! i guess i should also set the transcorr factor here or??
        get_helement_lattice_ex_mat => get_helement_k_space_hub_ex_mat
        get_helement_lattice_general => get_helement_k_space_hub_general
        ! maybe i have to initialize more here, especially if we are using the
        ! HPHF keyword I guess..

        call initSltCndPtr()

    end subroutine init_get_helement_k_space_hub

    function get_helement_k_space_hub_ex_mat(nI, ic, ex, tpar) result(hel)
        integer, intent(in) :: nI(nel), ic, ex(2, ic)
        logical, intent(in) :: tpar
        HElement_t(dp) :: hel

        !todo: if 2-body-transcorrelation, we can have triple excitations now..
        ! fix that here.. (and also in a lot of other parts in the code..)

        if (ic == 0) then
            ! the diagonal is just the sum of the occupied one-particle
            ! basis states
            hel = get_diag_helement_k_sp_hub(nI)

        else if (ic == 2) then

            hel = get_offdiag_helement_k_sp_hub(nI, ex, tpar)

        else if (ic == 3 .and. t_trans_corr_2body) then

            hel = get_3_body_helement_ks_hub(ex, tpar)

        else

            hel = h_cast(0.0_dp)

        end if

    end function get_helement_k_space_hub_ex_mat

    function get_helement_k_space_hub_general(nI, nJ, ic_ret) result(hel)
        integer, intent(in) :: nI(nel), nJ(nel)
        integer, intent(inout), optional :: ic_ret
        HElement_t(dp) :: hel
        integer :: ic, ex(2, 3), ex_2(2, 2)
        logical :: tpar
        integer(n_int) :: ilutI(0:NIfTot), ilutJ(0:niftot)

        !todo: if 2-body-transcorrelation, we can have triple excitations now..
        ! fix that here.. (and also in a lot of other parts in the code..)
        if (present(ic_ret)) then
            if (ic_ret == 0) then
                hel = get_diag_helement_k_sp_hub(nI)

            else if (ic_ret == 2) then
                ex_2(1, 1) = 2

                call GetExcitation(nI, nJ, nel, ex_2, tpar)
                hel = get_offdiag_helement_k_sp_hub(nI, ex_2, tpar)

            else if (ic_ret == 3 .and. t_trans_corr_2body) then
                ex(1, 1) = 3
                call GetExcitation(nI, nJ, nel, ex, tpar)
                hel = get_3_body_helement_ks_hub(ex, tpar)

            else if (ic_ret == -1) then
                call EncodeBitDet(nI, ilutI)
                call EncodeBitDet(nJ, ilutJ)

                ic_ret = FindBitExcitLevel(ilutI, ilutJ)

                if (ic_ret == 0) then
                    hel = get_diag_helement_k_sp_hub(nI)

                else if (ic_ret == 2) then
                    ex_2(1, 1) = 2
                    call GetBitExcitation(ilutI, ilutJ, ex_2, tpar)
                    hel = get_offdiag_helement_k_sp_hub(nI, ex_2, tpar)

                else if (ic_ret == 3 .and. t_trans_corr_2body) then
                    ex(1, 1) = 3
                    call GetBitExcitation(ilutI, ilutJ, ex, tpar)

                    hel = get_3_body_helement_ks_hub(ex, tpar)

                else
                    hel = h_cast(0.0_dp)
                end if
            else
                hel = h_cast(0.0_dp)
            end if
        else
            call EncodeBitDet(nI, ilutI)
            call EncodeBitDet(nJ, ilutJ)

            ic = FindBitExcitLevel(ilutI, ilutJ)

            if (ic == 0) then
                hel = get_diag_helement_k_sp_hub(nI)
            else if (ic == 2) then
                ex_2(1, 1) = 2

                call GetBitExcitation(ilutI, ilutJ, ex_2, tpar)
                hel = get_offdiag_helement_k_sp_hub(nI, ex_2, tpar)

            else if (ic == 3 .and. t_trans_corr_2body) then
                ex(1, 1) = 3
                call GetBitExcitation(ilutI, ilutJ, ex, tpar)

                hel = get_3_body_helement_ks_hub(ex, tpar)

            else
                hel = h_cast(0.0_dp)
            end if
        end if

    end function get_helement_k_space_hub_general

    ! i have not switched to this diag routine yet?!
    function get_diag_helement_k_sp_hub(nI) result(hel)
        integer, intent(in) :: nI(nel)
        HElement_t(dp) :: hel
        integer :: i, j, id(nel), idX, idN, k
        HElement_t(dp) :: hel_sing, hel_doub, hel_one, hel_three
        HElement_t(dp) :: temp_hel
        type(symmetry) :: p_sym, k_sym

        ! todo: in the case of 2-body-transcorrelation, there are more
        ! contributions..
        hel = h_cast(0.0_dp)

        if (t_trans_corr_2body) then
            ! this is the
            hel_sing = sum(GetTMatEl(nI, nI))

            id = get_spatial(nI)

            hel_doub = h_cast(0.0_dp)
            hel_one = h_cast(0.0_dp)
            hel_three = h_cast(0.0_dp)

            temp_hel = 0.0_dp

            ! redo this whole shabang.. the formulas are actually much easier:
            ! but just to be sure for now, do i explicetly without any use of
            ! symmetry
            do i = 1, nel
                do j = 1, nel
                    ! the restriction is, that i and j must have opposite
                    ! spin! this also excludes i == j
                    if (.not. same_spin(nI(i), nI(j))) then

                        idX = max(id(i), id(j))
                        idN = min(id(i), id(j))

                        ! now we need 1/2, since we loop over all electrons
                        hel_doub = hel_doub + 0.5_dp * get_umat_kspace(idN, idX, idN, idX)

                        ! then we need the factor of the one-body transcorr influence
                        ! t is defined as -t in our code!, so bhub is usually -1
                        ! and look in the formulas it is actually -2t*cos(k)*2(cosh J - 1)
                        ! (with the k-vector of orbial i!
                        hel_one = hel_one + epsilon_kvec(G1(nI(i))%Sym) &
                                  * omega * three_body_prefac

                        ! and the next part is the three-body with the direct
                        ! and the exchange parts
                        do k = 1, nel
                            ! and the convention is that j and k have same spin!
                            ! and j == k is also allowed and part of it!
                            if (j == k) cycle
                            if (same_spin(ni(j), nI(k))) then
                                ! the k vector is of i and i + j - k
                                ! i need the electrons here ofc..
                                ! even better then the correct k-vector addition
                                ! would be to store an epsilon-k in terms of
                                ! the symmetry symbols!
                                ! something like this but nicer!
                                p_sym = G1(nI(i))%sym
                                k_sym = SymTable(G1(nI(j))%sym%s, SymConjTab(G1(nI(k))%sym%s))

                                hel_three = hel_three - three_body_prefac * ( &
                                            epsilon_kvec(p_sym) - &
                                            (epsilon_kvec(SymTable(p_sym%s, k_sym%s))))

                            end if
                        end do
                    end if
                end do
            end do

            hel = hel_sing + hel_doub + hel_one + hel_three

        else
            hel = sltcnd_excit(nI, Excite_0_t())
        end if

    end function get_diag_helement_k_sp_hub

    function get_2_body_diag_transcorr(nI) result(two_body)
        integer, intent(in) :: nI(nel)
        HElement_t(dp) :: two_body
        integer :: i, j, id(nel), idX, idN

        two_body = h_cast(0.0_dp)

        id = get_spatial(nI)

        do i = 1, nel
            do j = 1, nel
                if (.not. same_spin(nI(i), nI(j))) then

                    idX = max(id(i), id(j))
                    idN = min(id(i), id(j))

                    ! now we need 1/2, since we loop over all electrons
                    two_body = two_body + 0.5_dp * get_umat_kspace(idN, idX, idN, idX)

                    two_body = two_body + epsilon_kvec(G1(nI(i))%Sym) &
                              * omega * three_body_prefac

                end if
            end do
        end do

    end function get_2_body_diag_transcorr

    function get_3_body_diag_transcorr(nI) result(three_body)
        integer, intent(in) :: nI(nel)
        HElement_t(dp) :: three_body

        integer :: i, j, k
        type(symmetry) :: p_sym, k_sym

        three_body = h_cast(0.0_dp)

        do i = 1, nel
            do j = 1, nel
                if (.not. same_spin(nI(i), nI(j))) then

                    do k = 1, nel

                        if (j == k) cycle

                        if (same_spin(nI(j), nI(k))) then
                            p_sym = G1(nI(i))%sym
                            k_sym = SymTable(G1(nI(j))%sym%s, SymConjTab(G1(nI(k))%sym%s))

                            three_body = three_body - three_body_prefac * ( &
                                        epsilon_kvec(p_sym) - &
                                        (epsilon_kvec(SymTable(p_sym%s, k_sym%s))))

                        end if
                    end do
                end if
            end do
        end do

    end function get_3_body_diag_transcorr

#ifndef CMPLX_
    real(dp) function get_j_opt(nI, corr_J)
        ! routine to evaluate Hongjuns J-optimization formulas
        integer, intent(in) :: nI(nel)
        real(dp), intent(in) :: corr_J

        integer :: i, j, a, spin_p, spin_q, b
        integer(n_int) :: ilut(0:niftot)
        type(symmetry) :: p_sym, q_sym, a_sym, b_sym, k_sym

        integer :: src(2), tgt(2)
        real(dp) :: sgn
        real(dp) :: two, rpa, exchange, sum_3, sum_hel

        call EncodeBitDet(nI, ilut)

        get_j_opt = 0.0_dp

        two = 0.0_dp
        rpa = 0.0_dp
        exchange = 0.0_dp
        sum_3 = 0.0_dp
        sum_hel = 0.0_dp
        if (.not. t_symmetric) then
            do i = 1, nel! -1
                do j = 1, nel
                    ! i only have a contribution if the spins of nI and nJ
                    ! are not the same!
                    if (.not. same_spin(nI(i), nI(j))) then
                        ! and then I need to loop over the holes, but due to
                        ! momentum conservation, only once!
                        do a = 1, nBasis
                            ! if a is empty
                            if (IsNotOcc(ilut, a)) then
                                b = get_orb_from_kpoints(nI(i), nI(j), a)
                                if (IsNotOcc(ilut, b) .and. .not. same_spin(a, b)) then

                                    p_sym = G1(nI(i))%sym
                                    q_sym = G1(nI(j))%sym
                                    spin_p = get_spin_pn(nI(i))
                                    spin_q = get_spin_pn(nI(j))

                                    ! and now i have to think how to correly
                                    ! choose the momenta involved
                                    if (same_spin(nI(i), a)) then
                                        a_sym = G1(a)%sym
                                        b_sym = G1(b)%sym
                                    else
                                        a_sym = G1(b)%sym
                                        b_sym = G1(a)%sym
                                    end if
                                    k_sym = SymTable(p_sym%s, SymConjTab(a_sym%s))

                                    ! since i loop over all possible i,j i do not need
                                    ! the sum like below i think
                                    get_j_opt = get_j_opt + &
                                                two_body_contrib(corr_J, p_sym, a_sym) + &
                                                three_body_rpa_contrib(corr_J, p_sym, a_sym, spin_p) + &
                                                three_body_exchange_contrib(nI, corr_J, p_sym, q_sym, a_sym, spin_q)

                                    two = two + two_body_contrib(corr_J, p_sym, a_sym)
                                    rpa = rpa + three_body_rpa_contrib(corr_J, p_sym, a_sym, spin_p)
                                    exchange = exchange + three_body_exchange_contrib(nI, corr_J, p_sym, q_sym, a_sym, spin_p)

                                end if
                            end if
                        end do
                    end if
                end do
            end do
        else
            do i = 1, nel! - 1
                do j = 1, nel
                    ! i only have a contribution if the spins of nI and nJ
                    ! are not the same!
                    if (.not. same_spin(nI(i), nI(j))) then
                        ! and then I need to loop over the holes, but due to
                        ! momentum conservation, only once!
                        do a = 1, nBasis
                            ! if a is empty
                            if (IsNotOcc(ilut, a)) then
                                b = get_orb_from_kpoints(nI(i), nI(j), a)
                                if (IsNotOcc(ilut, b) .and. .not. same_spin(a, b)) then! .and. a < b) then

                                    src = [min(nI(i), nI(j)), max(nI(i), nI(j))]
                                    tgt = [min(a, b), max(a, b)]

                                    if (is_beta(src(1))) then
                                        p_sym = G1(src(1))%sym
                                        q_sym = G1(src(2))%sym
                                    else
                                        p_sym = G1(src(2))%sym
                                        q_sym = G1(src(1))%sym
                                    end if

                                    spin_p = get_spin_pn(src(1))
                                    spin_q = get_spin_pn(src(2))

                                    if (same_spin(src(1), tgt(1))) then
                                        a_sym = G1(tgt(1))%sym
                                        b_sym = G1(tgt(2))%sym
                                        sgn = 1.0_dp
                                    else
                                        a_sym = G1(tgt(2))%sym
                                        b_sym = G1(tgt(1))%sym
                                        sgn = -1.0_dp
                                    end if

                                    k_sym = SymTable(p_sym%s, SymConjTab(a_sym%s))

                                    ! since i loop over all possible i,j i do not need
                                    ! the sum like below i think

                                    get_j_opt = get_j_opt + ( &
                                                two_body_contrib(corr_J, p_sym, a_sym) + &
                                                two_body_contrib(corr_J, q_sym, b_sym) + &
                                                three_body_rpa_contrib(corr_J, p_sym, a_sym, spin_p) + &
                                                three_body_rpa_contrib(corr_J, q_sym, b_sym, spin_q) + &
                                                three_body_exchange_contrib(nI, corr_J, p_sym, q_sym, a_sym, spin_p) + &
                                                three_body_exchange_contrib(nI, corr_J, q_sym, p_sym, b_sym, spin_q))

                                end if
                            end if
                        end do
                    end if
                end do
            end do
        end if
    end function get_j_opt
#endif

    function get_one_body_diag_sym(nI, spin, k_sym, t_sign) result(hel)
        integer, intent(in) :: nI(nel)
        integer, intent(in) :: spin
        type(symmetry), intent(in) :: k_sym
        logical, intent(in), optional :: t_sign
        HElement_t(dp) :: hel

        integer :: i, sgn
#ifdef DEBUG_
        character(*), parameter :: this_routine = "get_one_body_diag_sym"
#endif
        ! change this routine to also use just the symmetry symbols
        type(symmetry) :: sym
        logical :: t_sign_
        def_default(t_sign_, t_sign, .false.)

        ! the spin input: -1 is beta, +1 is alpha, 0 is both!
        ! if spin is not present, default is both!
        hel = h_cast(0.0_dp)

        ! k_sym is actually always present..
        ! work on the newest, hopefully correct way to do this..
        ! i need -s k vector for the triples contribution to the doubles..
        if (t_sign_) then
            sgn = -1
        else
            sgn = 1
        end if

        if (sgn == 1) then
            ASSERT(spin == -1 .or. spin == 1)
            if (spin == -1) then
                do i = 1, nel
                    if (is_beta(nI(i))) then
                        sym = SymTable(G1(nI(i))%sym%s, k_sym%s)
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            else if (spin == 1) then
                do i = 1, nel
                    if (is_alpha(nI(i))) then
                        sym = SymTable(G1(nI(i))%sym%s, k_sym%s)
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            end if
        else if (sgn == -1) then
            ASSERT(spin == -1 .or. spin == 1)
            if (spin == -1) then
                do i = 1, nel
                    if (is_beta(nI(i))) then
                        sym = SymTable(k_sym%s, SymConjTab(G1(nI(i))%sym%s))
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            else if (spin == 1) then
                do i = 1, nel
                    if (is_alpha(nI(i))) then
                        sym = SymTable(k_sym%s, SymConjTab(G1(nI(i))%sym%s))
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            end if
        end if

    end function get_one_body_diag_sym

    function get_one_body_diag_kvec(nI, spin, k_shift, t_sign) result(hel)
        integer, intent(in) :: nI(nel)
        integer, intent(in) :: spin, k_shift(3)
        logical, intent(in), optional :: t_sign
        HElement_t(dp) :: hel

        integer :: i, sgn
#ifdef DEBUG_
        character(*), parameter :: this_routine = "get_one_body_diag_kvec"
#endif
        ! change this routine to also use just the symmetry symbols
        integer :: sym_shift
        type(symmetry) :: sym
        logical :: t_sign_
        def_default(t_sign_, t_sign, .false.)

        ! the spin input: -1 is beta, +1 is alpha, 0 is both!
        ! if spin is not present, default is both!
        hel = h_cast(0.0_dp)

        ! k_shift is actually always present..
        ! work on the newest, hopefully correct way to do this..
        ! i need -s k vector for the triples contribution to the doubles..
        if (t_sign_) then
            sgn = -1
        else
            sgn = 1
        end if

        sym_shift = lat%k_to_sym(k_shift(1), k_shift(2), k_shift(3))

        if (sgn == 1) then
            ASSERT(spin == -1 .or. spin == 1)
            if (spin == -1) then
                do i = 1, nel
                    if (is_beta(nI(i))) then
                        sym = SymTable(G1(nI(i))%sym%s, sym_shift)
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            else if (spin == 1) then
                do i = 1, nel
                    if (is_alpha(nI(i))) then
                        sym = SymTable(G1(nI(i))%sym%s, sym_shift)
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            end if
        else if (sgn == -1) then
            ASSERT(spin == -1 .or. spin == 1)
            if (spin == -1) then
                do i = 1, nel
                    if (is_beta(nI(i))) then
                        sym = SymTable(sym_shift, SymConjTab(G1(nI(i))%sym%s))
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            else if (spin == 1) then
                do i = 1, nel
                    if (is_alpha(nI(i))) then
                        sym = SymTable(sym_shift, SymConjTab(G1(nI(i))%sym%s))
                        hel = hel + epsilon_kvec(sym)
                    end if
                end do
            end if
        end if

    end function get_one_body_diag_kvec

    function get_offdiag_helement_k_sp_hub(nI, ex, tpar) result(hel)
        ! this routine is called for the double excitations in the
        ! k-space hubbard. in case of transcorrelation, this can also be
        ! spin-parallel excitations now. the triple excitation have a
        ! seperate routine!
        ! The result does not depend on nI!
        integer, intent(in) :: nI(nel), ex(2, 2)
        logical, intent(in) :: tpar
        HElement_t(dp) :: hel
        integer :: src(2), tgt(2), ij(2), ab(2), spin
        type(symmetry) :: k_sym_a, k_sym_b, k_sym_c, k_sym_d
        real(dp) :: sgn

        src = get_src(ex)
        tgt = get_tgt(ex)

        if (.not. t_trans_corr_2body) then
            if (same_spin(src(1), src(2)) .or. same_spin(tgt(1), tgt(2))) then
                hel = h_cast(0.0_dp)
                return
            end if
        else
            ! if src has same spin but tgt has opposite spin -> 0 mat ele
            if (same_spin(src(1), src(2)) .and. (.not. same_spin(tgt(1), tgt(2)) &
                                                 .or. .not. same_spin(src(1), tgt(1)))) then

                hel = h_cast(0.0_dp)
                return
            end if
        end if

        ij = get_spatial(src)
        ab = get_spatial(tgt)
        ! that about the spin?? must spin(a) be equal spin(i) and same for
        ! b and j? does this have an effect on the sign of the matrix element?

        ! in the case of 2-body transcorrelation, parallel spin double exciattions
        ! are possible todo: check if we get the coulomb and exchange contributions
        ! correct..

        ! the U part is still just the the spin-opposite part
        ! damn... i need a sign convention here too..
        if (same_spin(src(1), tgt(1)) .and. same_spin(src(2), tgt(2))) then
            hel = get_umat_kspace(ij(1), ij(2), ab(1), ab(2))
        else if (same_spin(src(1), tgt(2)) .and. same_spin(src(2), tgt(1))) then
            hel = -get_umat_kspace(ij(1), ij(2), ab(1), ab(2))
        end if

        ! if hel == 0, due to momentum conservation violation we can already
        ! exit here, since this means this excitation is just no possible!
        ! is hel only 0 due to momentum conservation?
        if (abs(hel) < EPS) return

        if (t_trans_corr) then
            ! do something
            ! here the one-body term with out (-t) is necessary

            ! optimized version:
            hel = hel * exp(trans_corr_param / 2.0_dp * &
                            (epsilon_kvec(G1(src(1))%Sym) + epsilon_kvec(G1(src(2))%Sym) &
                             - epsilon_kvec(G1(tgt(1))%Sym) - epsilon_kvec(G1(tgt(2))%Sym)))
        end if

        if (t_trans_corr_2body) then
            ! i need the k-vector of the transferred momentum..
            ! i am not sure if the orbitals involved in ex() are every
            ! re-shuffled.. if yes, it is not so easy in the spin-parallel
            ! case to reobtain the transferred momentum. although it must be
            ! possible. for now just assume (ex(2,2)) is the final orbital b
            ! with momentum k_i + k_j - k_a and we need the
            ! k_j - k_a momentum

            if (same_spin(src(1), src(2))) then
                spin = get_spin_pn(src(1))
                ! we need the spin of the excitation here if it is parallel

                ! in the same-spin case, this is the only contribution to the
                ! matrix element
                ! and maybe i have to take the sign additionally into
                ! account here?? or is this taken care of with tpar??

                ! thanks to Manu i have figured it out. we have to take
                ! the momentum between the to equally possible excitations:
                ! c^+_b c^+_a c_q c_p with W(q-a)
                ! and
                !-c^+_b c^+_a c_p c_q with W(p-a)
                ! with one of the orbital spins.
                ! i think it doesnt matter, which one.
                ! although for the sign it maybe does.. check thate
                ! TODO: i am not sure about the sign here...
                ! with a + i get nice symmetric results.. but i am really
                ! not sure damn.. ask ALI!
                ! i have to define an order of the input!
                ! maybe only look at i < j and a < b, as in the rest of the
                ! code! and then take the symmetrized matrix element

                src = [minval(src), maxval(src)]
                tgt = [minval(tgt), maxval(tgt)]

                k_sym_a = SymTable(G1(src(1))%sym%s, SymConjTab(G1(tgt(1))%sym%s))
                k_sym_b = SymTable(G1(src(1))%sym%s, SymConjTab(G1(tgt(2))%sym%s))

                ! yes this is it below! i just have to be sure that src and
                ! tgt are ordered.. we need a convention for these matrix
                ! elements!
                spin = get_spin_pn(src(1))

                hel = (same_spin_transcorr_factor(nI, k_sym_a, spin) &
                       - same_spin_transcorr_factor(nI, k_sym_b, spin))! &

            else
                ! else we need the opposite spin contribution

                ! the two-body contribution needs two k-vector inputs.
                ! figure out what momentum is necessary there!
                ! i need the transferred momentum
                ! and the momentum of other involved electron
                ! which by definition of k, is always ex(1,2) todo:
                ! check if this works as intented
                ! TODO no it is not!! I have to get the signed contribution
                ! here correct.. order in EX is not ensured!
                ! see above for same-spin excitations!
                ! what is k-vec now??
                ! this seems to have the correct symmetry..
                ! todo.. still the check if i need 1/2 factor or smth..
                ! and not sure about the sign between those two..
                ! here i am still not sure why i need two factors..
                ! i think i could get away with a convention, which momentum
                ! to take depending on the spin.. or i just symmetrize it..
                ! which hopefully is ok..
                ! because if i put it like that with k and -k it apparently
                ! cancels..
                ! maybe i also need a convention of an ordered input of ex..

                sgn = 1.0_dp
                ! also adapt this two body factor.. i hope this is correct now
                if (same_spin(src(1), tgt(1))) then
                    ! i need the right hole-momenta
                    k_sym_c = G1(tgt(1))%sym
                    k_sym_d = G1(tgt(2))%sym
                    sgn = 1.0_dp
                else
                    k_sym_c = G1(tgt(2))%sym
                    k_sym_d = G1(tgt(1))%sym
                    sgn = -1.0_dp
                end if

                hel = hel + sgn * (two_body_transcorr_factor(G1(src(1))%sym, k_sym_c) &
                                   + two_body_transcorr_factor(G1(src(2))%sym, k_sym_d))

                ! and now the 3-body contribution:
                ! which also needs the third involved mometum, which then
                ! again is ex(1,1)
                ! todo.. figure out spins!
                ! also check that! which electron momentum one has to take!
                ! maybe this cancels in the end.. who knows..

                ! what should i take as the spin here?? electron 1 or 2?
                ! i have to account for the sum of both possible spin
                ! influences!! damn.. todo!
                ! and this then determines which momentum i have to take.. or?

                hel = hel + sgn * (three_body_transcorr_fac(nI, G1(src(1))%sym, &
                                                            G1(src(2))%sym, k_sym_c, get_spin_pn(src(1))) &
                                   + three_body_transcorr_fac(nI, G1(src(2))%sym, &
                                                              G1(src(1))%sym, k_sym_d, get_spin_pn(src(2))))

            end if
        end if

        if (tpar) hel = -hel

    end function get_offdiag_helement_k_sp_hub

    subroutine setup_k_total(nI)
        integer, intent(in), optional :: nI(nel)
        character(*), parameter :: this_routine = "setup_k_total"

        integer :: i

        if (present(nI)) then

            ktotal = 0

            do i = 1, nel
                kTotal = lat%add_k_vec(kTotal, G1(nI(i))%k)
            end do

            if (.not. t_k_space_hubbard) then
                call MomPbcSym(kTotal, nBasisMax)
            end if

        else
            ! do i based on the HF det!
            call stop_all(this_routine, "not yet implemented")
        end if

    end subroutine setup_k_total

    ! finally write the functions to setup up the pesky G1 and nBasisMax
    ! quantities to be consistent with the rest of the old code
    subroutine setup_k_space_hub_sym(in_lat)
        class(lattice), intent(in), optional :: in_lat
        character(*), parameter :: this_routine = "setup_k_space_hub_sym"

        INTEGER :: i, j, SymInd, Lab, spin, sym0
        INTEGER, ALLOCATABLE :: Temp(:)
        ! These are for the hubbard and UEG model look-up table
        type(Symmetry) :: SymProduct, SymI, SymJ

        if (present(in_lat)) then
            if (.not. associated(G1)) then
                call setup_g1(in_lat)
            end if
            if (all(nBasisMax == 0)) then
                call setup_nbasismax(in_lat)
            end if

            ! do only the necessary setup here!
            ! this is essentially from symrandexcit2.F90 SpinOrbSymSetup()

            ElecPairs = (NEl * (NEl - 1)) / 2
            MaxABPairs = (nBasis * (nBasis - 1) / 2)

            ScratchSize = 2 * nSymLabels

            if (allocated(SpinOrbSymLabel)) deallocate(SpinOrbSymLabel)

            allocate(SpinOrbSymLabel(nBasis))
            do i = 1, nBasis
                !This ensures that the symmetry labels go from 0 -> nSymLabels-1
                SpinOrbSymLabel(i) = SymClasses(((i + 1) / 2)) - 1
            end do
#ifdef DEBUG_
            write(stdout, *) "SpinOrbSymLabel: "
            do i = 1, nBasis
                write(stdout, *) i, SpinOrbSymLabel(i)
            end do
#endif
            if (allocated(SymTableLabels)) deallocate(SymTableLabels)

            allocate(SymTableLabels(0:nSymLabels - 1, 0:nSymLabels - 1))
            SymTableLabels(:, :) = -9000    !To make it easier to track bugs
            do i = 0, nSymLabels - 1
                do j = 0, i
                    SymI = SymLabels(i + 1)        !Convert to the other symlabel convention to use SymLabels -
                    !TODO: I will fix this to make them consistent when working (ghb24)!
                    SymJ = SymLabels(j + 1)
                    SymProduct = SymProd(SymI, SymJ)
                    !Now, we need to find the label according to this symmetry!
                    !Run through all symmetries to make working (this could be far more efficient, but its only once, so sod it...
                    do Lab = 1, nSymLabels
                        if (SymLabels(Lab)%S == SymProduct%S) then
                            EXIT
                        end if
                    end do
                    if (Lab == nSymLabels + 1) then
                        call stop_all("SpinOrbSymSetup", "Cannot find symmetry label")
                    end if
                    SymTableLabels(i, j) = Lab - 1
                    SymTableLabels(j, i) = Lab - 1
                end do
            end do
#ifdef DEBUG_
            write(stdout, *) "SymTable:"
            do i = 0, nSymLabels - 1
                do j = 0, nSymLabels - 1
                    write(stdout, "(I6)", advance='no') SymTableLabels(i, j)
                end do
                write(stdout, *) ""
            end do
#endif

            !SymInvLabel takes the label (0 -> nSymLabels-1) of a spin orbital, and returns the inverse symmetry label, suitable for
            !use in ClassCountInd.
            if (allocated(SymInvLabel)) deallocate(SymInvLabel)
            allocate(SymInvLabel(0:nSymLabels - 1))
            SymInvLabel = -999

            ! Dongxia changes the gamma point away from center.
            ! SDS: Provide a default sym0 for cases where this doesn't apply
            sym0 = 0
            do i = 1, nsymlabels
                if (symlabels(i)%s == 0) sym0 = i - 1
            end do

            do i = 0, nSymLabels - 1
                ! Change the sym label back to the representation used by the rest
                ! of the code, use SymConjTab, then change back to other rep of
                ! labels SymConjTab only works when all irreps are self-inverse.
                ! Therefore, instead, we will calculate the inverses by just
                ! finding the symmetry which will give A1.
                do j = 0, nSymLabels - 1
                    ! Run through all labels to find what gives totally symmetric
                    ! rep
                    if (SymTableLabels(i, j) == sym0) then
                        if (SymInvLabel(i) /= -999) then
                            write(stdout, *) "SymLabel: ", i
                            call stop_all(this_routine, &
                                          "Multiple inverse irreps found - error")
                        end if
                        ! This is the inverse
                        SymInvLabel(i) = j
                    end if
                end do
                if (SymInvLabel(i) == -999) then
                    write(stdout, *) "SymLabel: ", i
                    call stop_all(this_routine, "No inverse symmetry found - error")
                end if
            end do
#ifdef DEBUG_
            write(stdout, *) "SymInvLabel: "
            do i = 0, nSymLabels - 1
                write(stdout, *) i, SymInvLabel(i)
            end do
#endif

            !SymLabelList2 and SymLabelCounts2 are now organised differently, so that it is more efficient, and easier to add new symmetries.
            !SymLabelCounts is of size (2,ScratchSize), where 1,x gives the index in SymlabelList2 where the orbitals of symmetry x start.
            !SymLabelCounts(2,x) tells us the number of orbitals of spin & symmetry x there are.

            !Therefore, if you want to run over all orbitals of a specific symmetry, you want to run over
            !SymLabelList from SymLabelCounts(1,sym) to SymLabelCounts(1,sym)+SymLabelCounts(2,sym)-1

            if (allocated(SymLabelList2)) deallocate(SymLabelList2)
            if (allocated(SymLabelCounts2)) deallocate(SymLabelCounts2)

            allocate(SymLabelList2(nBasis))
            allocate(SymLabelCounts2(2, ScratchSize))
            SymLabelList2(:) = 0          !Indices:   spin-orbital number
            SymLabelCounts2(:, :) = 0      !Indices:   index/Number , symmetry(inc. spin)
            allocate(Temp(ScratchSize))

            do j = 1, nBasis
                IF (G1(j)%Ms == 1) THEN
                    Spin = 1
                ELSE
                    Spin = 2
                end if
                SymInd = ClassCountInd(Spin, SpinOrbSymLabel(j), G1(j)%Ml)
                SymLabelCounts2(2, SymInd) = SymLabelCounts2(2, SymInd) + 1
            end do
            SymLabelCounts2(1, 1) = 1
            do j = 2, ScratchSize
                SymLabelCounts2(1, j) = SymLabelCounts2(1, j - 1) + SymLabelCounts2(2, j - 1)
            end do
            Temp(:) = SymLabelCounts2(1, :)
            do j = 1, nBasis
                IF (G1(j)%Ms == 1) THEN
                    Spin = 1
                ELSE
                    Spin = 2
                end if
                SymInd = ClassCountInd(Spin, SpinOrbSymLabel(j), G1(j)%Ml)
                SymLabelList2(Temp(SymInd)) = j
                Temp(SymInd) = Temp(SymInd) + 1
            end do

            Deallocate(Temp)

            if (allocated(OrbClassCount)) deallocate(OrbClassCount)
            allocate(OrbClassCount(ScratchSize))
            OrbClassCount(:) = 0
            do i = 1, nBasis
                IF (G1(i)%Ms == 1) THEN
                    OrbClassCount(ClassCountInd(1, SpinOrbSymLabel(i), G1(i)%Ml)) = &
                    & OrbClassCount(ClassCountInd(1, SpinOrbSymLabel(i), G1(i)%Ml)) + 1
                ELSE
                    OrbClassCount(ClassCountInd(2, SpinOrbSymLabel(i), G1(i)%Ml)) = &
                    & OrbClassCount(ClassCountInd(2, SpinOrbSymLabel(i), G1(i)%Ml)) + 1
                end if
            end do

            call setup_kPointToBasisFn(in_lat)

            call gensymstatepairs(nbasis / 2, .false.)

        else
            call Stop_All(this_routine, "not yet implemented")
        end if

    end subroutine setup_k_space_hub_sym

    subroutine setup_g1(in_lat)
        use SystemData, only: G1
        class(lattice), intent(in), optional :: in_lat
        character(*), parameter :: this_routine = "setup_g1"

        integer :: i

        ! i think everything is in the System_neci file
        if (present(in_lat)) then
            ! only do it if G1 has not been setup yet!
            if (.not. associated(G1)) then
                ! i need number of spin-orbitals
                allocate(G1(in_lat%get_nsites() * 2))
                G1 = NullBasisFn

                ! should i rely on the already setup nBasisMax?
                if (all(nBasisMax == 0)) then
                    call setup_nbasismax(in_lat)
                end if

                ! do a new setup of the G1..
                do i = 1, in_lat%get_nsites()
                    G1(2 * i - 1)%k = in_lat%get_k_vec(i)
                    G1(2 * i - 1)%ms = -1
                    ! can i already write the symmetry representation in here?
                    ! i guess so..
                    G1(2 * i - 1)%Sym = Symmetry(i)

                    G1(2 * i)%k = in_lat%get_k_vec(i)
                    G1(2 * i)%ms = 1
                    G1(2 * i)%Sym = Symmetry(i)
                end do

                if (in_lat%is_k_space()) then
                    call setup_symmetry_table()

                else
                    ! also to the rest of the symmetry stuff here:
                    ! in case of real-space turn off symmetry completely:
                    call GenMolpSymTable(1, G1, in_lat%get_nsites() * 2)
                    ! and i have to redo the symmetry setting to 0
                    do i = 1, in_lat%get_nsites() * 2
                        G1(i)%sym%s = 0
                    end do
                end if

            end if
        else
            ! not yet implemented!
            call Stop_All(this_routine, "not yet implemented")
        end if

    end subroutine setup_g1

    subroutine setup_nbasismax(in_lat)
        class(lattice), intent(in), optional :: in_lat
        character(*), parameter :: this_routine = "setup_nbasismax"

        integer :: dummy_size
        ! thats a fucking pain in the ass.. i do not want to do that now!
        if (present(in_lat)) then
            if (all(nBasisMax == 0)) then
                ! only do smth if nbasismax was not changed yet

                ! whatever spin-polar means:
                if (TSPINPOLAR) then
                    nBasisMax(4, 1) = 1
                    nBasisMax(2, 3) = 1
                else
                    nBasisMax(4, 1) = -1
                    if (nBasisMax(2, 3) == 0) nBasisMax(2, 3) = 2
                end if

                if (t_k_space_hubbard) then
                    nBasisMax(1, 3) = 0
                else if (t_new_real_space_hubbard) then
                    nBasisMax(1, 3) = 4
                    nBasisMax(3, 3) = 0
                    nBasisMax(2, 3) = 1
                end if

                ! this is never explained:
                nBasisMax(4, 2) = 1

                return

                ! i should give lattice also a member type and a k-space flag..
                if (trim(in_lat%get_name()) == 'tilted') then
                    ! how do i get nmaxx and the rest effectively??
                    ! if it is tilted the nmax stuff is usualy 1 or??
                    call SETBASISLIM_HUBTILT(nBasisMax, 1, 1, 1, dummy_size, &
                                             in_lat%is_periodic(), in_lat%get_length(1), in_lat%get_length(2))
                else
                    call SETBASISLIM_HUB(nBasisMax, in_lat%get_length(1), &
                                         in_lat%get_length(2), in_lat%get_length(3), dummy_size, &
                                         in_lat%is_periodic(),.not. in_lat%is_k_space())
                end if
                if (thub .and. treal) then
                    ! apparently this allows integrals between different
                    ! spins: so in the transcorrelated hubbard this should
                    ! be changed also maybe?
                    nBasisMax(2, 3) = 1
                end if
                ASSERT(dummy_size == in_lat%get_nsites() * 2)
            end if
        else
            call Stop_All(this_routine, "not yet implemented")
        end if

    end subroutine setup_nbasismax

    subroutine setup_kPointToBasisFn(in_lat)
        class(lattice), intent(in), optional :: in_lat
        character(*), parameter :: this_routine = "setup_kPointToBasisFn"

        integer :: i, kmaxX, kminX, kmaxY, kminY, kminZ, kmaxZ, iSpinIndex

        if (present(in_lat)) then

            if (allocated(kPointToBasisFn)) return

            if (all(nBasisMax == 0)) then
                call setup_nbasismax(in_lat)
                call setup_g1(in_lat)
            end if

            if (.not. associated(G1)) then
                call setup_g1(in_lat)
            end if

            kmaxX = 0
            kminX = 0
            kmaxY = 0
            kminY = 0
            ! [W.D:] can we make this the same as in the UEG:
            kminZ = 0
            kmaxZ = 0
            do i = 1, 2 * in_lat%get_nsites()
                ! In the hubbard model with tilted lattice boundary conditions,
                ! it's unobvious what the maximum values of
                ! kx and ky are, so this should be found
                IF (G1(i)%k(1) > kmaxX) kmaxX = G1(i)%k(1)
                IF (G1(i)%k(1) < kminX) kminX = G1(i)%k(1)
                IF (G1(i)%k(2) > kmaxY) kmaxY = G1(i)%k(2)
                IF (G1(i)%k(2) < kminY) kminY = G1(i)%k(2)
                if (G1(i)%k(3) > kmaxz) kmaxz = g1(i)%k(3)
                if (G1(i)%k(3) < kminz) kminz = g1(i)%k(3)
            end do
            allocate(kPointToBasisFn(kminX:kmaxX, kminY:kmaxY, kminz:kmaxz, 2))
            !Init to invalid
            kPointToBasisFn = -1
            do i = 1, 2 * in_lat%get_nsites()
                ! iSpinIndex equals 1 for a beta spin (ms=-1), and 2 for an alpha spin (ms=1)
                iSpinIndex = (G1(i)%Ms + 1) / 2 + 1
                kPointToBasisFn(G1(i)%k(1), G1(i)%k(2), G1(i)%k(3), iSpinIndex) = i
            end do
        else
            call Stop_All(this_routine, "not yet implemented!")
        end if

    end subroutine setup_kPointToBasisFn

    ! create the necessary routines for the triple excitation in the
    ! 2-body transcorrelated k-space hamiltonian
    HElement_t(dp) function same_spin_transcorr_factor_kvec(nI, k_vec, spin)
        ! this is the term coming appearing in the spin-parallel
        ! excitations coming from the k = 0 triple excitation
        integer, intent(in) :: nI(nel), k_vec(N_DIM), spin

        same_spin_transcorr_factor_kvec = -three_body_prefac * ( &
                                          get_one_body_diag(nI, -spin, k_vec) + get_one_body_diag(nI, -spin, k_vec, .true.))

    end function same_spin_transcorr_factor_kvec

    ! create the necessary routines for the triple excitation in the
    ! 2-body transcorrelated k-space hamiltonian
    HElement_t(dp) function same_spin_transcorr_factor_ksym(nI, k_sym, spin)
        ! this is the term coming appearing in the spin-parallel
        ! excitations coming from the k = 0 triple excitation
        integer, intent(in) :: nI(nel), spin
        type(symmetry), intent(in) :: k_sym

        same_spin_transcorr_factor_ksym = -three_body_prefac * ( &
                                          get_one_body_diag(nI, -spin, k_sym) + get_one_body_diag(nI, -spin, k_sym, .true.))

    end function same_spin_transcorr_factor_ksym

    HElement_t(dp) function rpa_contrib_kvec(J, p, k, spin)
        ! gives the rpa contribution in the J-optimization
        real(dp), intent(in) :: J
        integer, intent(in) :: p(N_DIM), k(N_DIM), spin
#ifdef DEBUG_
        character(*), parameter :: this_routine = "rpa_contrib_kvec"
#endif
        integer :: q(N_DIM)

        q = lat%subtract_k_vec(p, k)

        ASSERT(spin == 1 .or. spin == -1)

        rpa_contrib_kvec = real(bhub, dp) * (cosh(J) - 1.0_dp) / real(omega, dp) * &
                           (n_opp(spin) - 1.0_dp) * (epsilon_kvec(p) + epsilon_kvec(q))

    end function rpa_contrib_kvec

    HElement_t(dp) function rpa_contrib_ksym(J, p, a, spin)
        ! same as above just with symmetry symbols instead of vectors
        ! BUT here i have to be careful to determine the substraction p - k
        ! already before calling this function! it is the k-symbol of the
        ! hole correspinding to p!
        real(dp), intent(in) :: J
        type(symmetry), intent(in) :: p, a
        integer, intent(in) :: spin
#ifdef DEBUG_
        character(*), parameter :: this_routine = "rpa_contrib_ksym"
#endif
        real(dp) :: n_opp_loc

        ASSERT(spin == -1 .or. spin == 1)

        if (spin == -1) then
            n_opp_loc = real(nOccAlpha, dp)
        else
            n_opp_loc = real(nOccBeta, dp)
        end if

        rpa_contrib_ksym = -2.0_dp * real(bhub, dp) * (cosh(J) - 1.0_dp) / real(omega, dp) * &
                           n_opp_loc * (epsilon_kvec(p) + epsilon_kvec(a))

    end function rpa_contrib_ksym

    HElement_t(dp) function two_body_contrib_kvec(J, p, k)
        ! two body contribution for the J optimization!
        real(dp), intent(in) :: J
        integer, intent(in) :: p(N_DIM), k(N_DIM)

        integer :: q(N_DIM)

        q = lat%subtract_k_vec(p, k)

        ! i still have to decide how to loop over the HF.. maybe i dont
        ! double count and then i dont need the /2 here!
        two_body_contrib_kvec = real(uhub, dp) / 2.0_dp - real(bhub, dp) * &
                                ((exp(J) - 1.0_dp) * epsilon_kvec(p) + (exp(-J) - 1.0) * epsilon_kvec(q))

    end function two_body_contrib_kvec

    HElement_t(dp) function two_body_contrib_ksym(J, p, a)
        ! same as above just with symmetry symbols
        ! AND: we have  to do the p - k before calling this function!
        real(dp), intent(in) :: J
        type(symmetry), intent(in) :: p, a

        if (.not. t_symmetric) then
            two_body_contrib_ksym = real(uhub, dp) / 2.0_dp + real(bhub, dp) * &
                                    ((exp(J) - 1.0_dp) * epsilon_kvec(a) + (exp(-J) - 1.0) * epsilon_kvec(p))
        else
            two_body_contrib_ksym = real(uhub, dp) / 2.0_dp + real(bhub, dp) * &
                                    ((exp(J) - 1.0_dp) * epsilon_kvec(a) + (exp(-J) - 1.0) * epsilon_kvec(p))
        end if

    end function two_body_contrib_ksym

    HElement_t(dp) function exchange_contrib_kvec(nI, J, p, q, k, spin)
        ! the 3-body exchange contribution for the J optimization
        integer, intent(in) :: nI(:), p(N_DIM), q(N_DIM), k(N_DIM), spin
        real(dp), intent(in) :: J
#ifdef DEBUG_
        character(*), parameter :: this_routine = "exchange_contrib_kvec"
#endif
        integer :: k1(N_DIM), k2(N_DIM)

        ASSERT(spin == -1 .or. spin == 1)

        k1 = lat%add_k_vec(p, q)
        k2 = lat%subtract_k_vec(p, q)
        k2 = lat%subtract_k_vec(k2, k)

        exchange_contrib_kvec = -2.0_dp * real(bhub, dp) * (cosh(J) - 1.0_dp) / real(omega, dp) &
                                * (get_one_body_diag(nI, -spin, k1, .true.) + get_one_body_diag(nI, -spin, k2, .true.))

    end function exchange_contrib_kvec

    HElement_t(dp) function exchange_contrib_ksym(nI, J, p, q, a, spin)
        ! sym-symbol version of above!
        ! BUT here: p and q are the symbols of the electrons and q is the
        ! symbol of 1 hole! so i have to call this function also for the
        ! exchanged version!
        integer, intent(in) :: nI(:), spin
        real(dp), intent(in) :: J
        type(symmetry), intent(in) :: p, q, a
#ifdef DEBUG_
        character(*), parameter :: this_routine = "exchange_contrib_ksym"
#endif
        type(symmetry) :: k1, k2

        ASSERT(spin == -1 .or. spin == 1)

        k1 = SymTable(p%s, q%s)
        ! the subtraction has something to do with the inputted spin!!
        ! todo
        ! spin is chosen from p momentum! so a is the p-k = a hole
        ! and we need the dipersion of p-k - q = a - q
        k2 = SymTable(a%s, SymConjTab(q%s))

        exchange_contrib_ksym = 2.0_dp * real(bhub, dp) * (cosh(J) - 1.0_dp) / real(omega, dp) &
                                * (get_one_body_diag(nI, -spin, k1, .true.) + get_one_body_diag(nI, -spin, k2))

    end function exchange_contrib_ksym

    HElement_t(dp) function two_body_transcorr_factor_kvec(p, k)
        integer, intent(in) :: p(N_DIM), k(N_DIM)

        ! take out the part with U/2 since this is already covered in the
        ! "normal" matrix elements
        two_body_transcorr_factor_kvec = real(bhub, dp) / real(omega, dp) * ( &
                                         (exp(trans_corr_param_2body) - 1.0_dp) * epsilon_kvec(k) + &
                                         (exp(-trans_corr_param_2body) - 1.0_dp) * epsilon_kvec(p))

    end function two_body_transcorr_factor_kvec

    subroutine init_two_body_trancorr_fac_matrix()
        integer :: i, j
        type(symmetry) :: sym_i, sym_j

        ! for more efficiency, precompute the two-body factor for all possible
        ! symmetry symbols
        if (allocated(two_body_transcorr_factor_matrix)) deallocate(two_body_transcorr_factor_matrix)

        allocate(two_body_transcorr_factor_matrix(nBasis / 2, nBasis / 2), source=0.0_dp)

        ! loop over spatial orbitals
        do i = 1, nBasis / 2
            sym_i = G1(2 * i)%sym
            do j = 1, nBasis / 2
                sym_j = G1(2 * j)%Sym
                two_body_transcorr_factor_matrix(sym_j%s, sym_i%s) = &
                    bhub / omega &
                    * ((exp(trans_corr_param_2body) - 1.0_dp) * epsilon_kvec(sym_i) &
                        + (exp(-trans_corr_param_2body) - 1.0_dp) * epsilon_kvec(sym_j))
            end do
        end do

    end subroutine init_two_body_trancorr_fac_matrix

    HElement_t(dp) function two_body_transcorr_factor_ksym(p, k)
        type(symmetry), intent(in) :: p, k

        ! take out the part with U/2 since this is already covered in the
        ! "normal" matrix elements

        ! optimize this better and precompute more stuff!

        two_body_transcorr_factor_ksym = two_body_transcorr_factor_matrix(p%s, k%s)

    end function two_body_transcorr_factor_ksym

    HElement_t(dp) function three_body_transcorr_fac_kvec(nI, p, q, k, spin)
        integer, intent(in) :: nI(nel), p(N_DIM), q(N_DIM), k(N_DIM), spin
#ifdef DEBUG_
        character(*), parameter :: this_routine = "three_body_transcorr_fac_kvec"
#endif
        integer :: k1(3), k2(3)

        ASSERT(spin == 1 .or. spin == -1)

        ! update: add k-vec to not leave first BZ
        k1 = lat%subtract_k_vec(k, q)
        k2 = lat%add_k_vec(p, q)
        three_body_transcorr_fac_kvec = -three_body_prefac * ( &
                                        n_opp(spin) * (epsilon_kvec(p) + epsilon_kvec(k)) - ( &
                                        get_one_body_diag(nI, -spin, k1) + get_one_body_diag(nI, -spin, k2, .true.)))

    end function three_body_transcorr_fac_kvec

    HElement_t(dp) function three_body_transcorr_fac_ksym(nI, p, q, k, spin)
        integer, intent(in) :: nI(nel), spin
        type(symmetry) :: p, q, k
#ifdef DEBUG_
        character(*), parameter :: this_routine = "three_body_transcorr_fac_ksym"
#endif
        type(symmetry) :: k1, k2
        real(dp) :: n_opp_loc

        ASSERT(spin == 1 .or. spin == -1)

        if (spin == -1) then
            n_opp_loc = real(nOccAlpha, dp)
        else
            n_opp_loc = real(nOccBeta, dp)
        end if

        k1 = SymTable(k%s, SymConjTab(q%s))
        k2 = SymTable(p%s, q%s)

        three_body_transcorr_fac_ksym = three_body_const_mat(p%s, k%s, spin) + &
                                        three_body_prefac * (get_one_body_diag(nI, -spin, k1) + get_one_body_diag(nI, -spin, k2, .true.))

    end function three_body_transcorr_fac_ksym

    subroutine init_three_body_const_mat()
        integer :: i, j
        type(symmetry) :: sym_i, sym_j

        if (allocated(three_body_const_mat)) deallocate(three_body_const_mat)
        allocate(three_body_const_mat(nBasis / 2, nBasis / 2, -1:1), source=0.0_dp)

        do i = 1, nBasis / 2
            sym_i = G1(2 * i)%Sym
            do j = 1, nBasis / 2
                sym_j = G1(2 * j)%Sym
                three_body_const_mat(sym_i%s, sym_j%s, -1) = &
                    -three_body_prefac &
                        * n_opp(-1) &
                        * real(epsilon_kvec(sym_i) + epsilon_kvec(sym_j), dp)

                three_body_const_mat(sym_i%s, sym_j%s, 1) = &
                    -three_body_prefac &
                        * n_opp(1) &
                        * real(epsilon_kvec(sym_i) + epsilon_kvec(sym_j), dp)
            end do
        end do

    end subroutine init_three_body_const_mat

    function get_3_body_helement_ks_hub(ex, tpar) result(hel)
        ! the 3-body matrix element.. here i have to be careful about
        ! the sign and stuff.. and also if momentum conservation is
        ! fullfilled ..
        integer, intent(in) ::  ex(2, 3)
        logical, intent(in) :: tpar
        HElement_t(dp) :: hel
        integer :: ms_elec, ms_orbs, opp_elec, opp_orb, par_elecs(2), par_orbs(2)
        logical :: sgn
        type(symmetry) :: p_sym, hole_sym, k_sym, k1_sym, k2_sym
        type(symmetry) :: ka_sym, kb_sym, kc_sym, kd_sym

        hel = h_cast(0.0_dp)

        ms_elec = sum(get_spin_pn(ex(1, :)))
        ms_orbs = sum(get_spin_pn(ex(2, :)))

        ! check spin:
        if (.not. (ms_elec == ms_orbs)) return
        if (.not. (ms_elec == 1 .or. ms_elec == -1)) return

        ! check momentum conservation:
        if (.not. check_momentum_sym(ex(1, :), ex(2, :))) return

        ! i have to get the correct momenta for the epsilon contribution
        ! see the sheets for the k-vec relations.
        ! we need the momentum p + (s-b) or variations thereof..
        ! p, we know, since it is the momentum of the minority spin-electron
        ! and (s-b) is the difference of an majority spin electron and the
        ! fitting hole, so the total momentum conservation a + b + c = s + p + q
        ! is fulfilled
        ! the same ofc is a + (c-q)
        ! is the minority hole always in ex(2,1)? otherwise we have to find it
        ! it is not!

        ! i think i have figured it out with the help of Manu
        ! the k-vector of the minority spin is always involved
        ! but of the electron.. or can we transform it?
        ! any way we have to calculate
        ! W(k_p + k_s - k_b) - W(k_p + k_q - k_b)
        ! for this we have to figure out what the minority and majority
        ! electrons are!
        opp_elec = find_minority_spin(ex(1, :))

        ! although i really can't be sure about the minority whole always
        ! being at the first position in ex(2,:)..
        opp_orb = find_minority_spin(ex(2, :))

        p_sym = G1(opp_elec)%sym
        hole_sym = G1(opp_orb)%sym

        par_elecs = pack(ex(1, :), ex(1, :) /= opp_elec)
        par_orbs = pack(ex(2, :), ex(2, :) /= opp_orb)

        k_sym = SymTable(p_sym%s, hole_sym%s)

        ! we have to define an order here too
        par_elecs = [minval(par_elecs), maxval(par_elecs)]
        par_orbs = [minval(par_orbs), maxval(par_orbs)]

        ! BZ conserving addition:
        k1_sym = SymTable(G1(par_orbs(1))%sym%s, SymConjTab(G1(par_elecs(1))%sym%s))
        k2_sym = SymTable(G1(par_orbs(1))%sym%s, SymConjTab(G1(par_elecs(2))%sym%s))

        ! need to do the correct k additions!
        ! for some reason the compiler does not recognize the output of
        ! add_k_vec and subtract_k_vec as a vector...
        ! so do it intermediately
        ka_sym = SymTable(hole_sym%s, k1_sym%s)
        kb_sym = SymTable(hole_sym%s, k2_sym%s)
        kc_sym = SymTable(k1_sym%s, SymConjTab(p_sym%s))
        kd_sym = SymTable(k2_sym%s, SymConjTab(p_sym%s))

        hel = three_body_prefac * ( &
              epsilon_kvec(ka_sym) - epsilon_kvec(kb_sym) + &
              epsilon_kvec(kc_sym) - epsilon_kvec(kd_sym))

        ! i have to decide on a sign here depending on the order of the
        ! operators.. todo!
        sgn = get_3body_sign(ex)

        if (.not. sgn) hel = -hel

        if (tpar) hel = -hel

    end function get_3_body_helement_ks_hub

    logical function get_3body_sign(ex)
        ! i need to find some sign convention on the 3-body term, depending
        ! on the spin of the involved orbitals
        integer, intent(in) :: ex(2, 3)

        integer :: src(3), tgt(3), i, elec_pos, orb_pos

        ! we also have to define an order of the parallel spins..
        ! or is this ensured? i guess it should..
        src = get_src(ex)
        tgt = get_tgt(ex)

        if (sum(get_spin_pn(src)) == -1) then
            ! then alpha is the opposite spin
            do i = 1, 3
                if (is_alpha(src(i))) elec_pos = i
                if (is_alpha(tgt(i))) orb_pos = i
            end do

        else
            ! otherwise beta is minority
            do i = 1, 3
                if (is_beta(src(i))) elec_pos = i
                if (is_beta(tgt(i))) orb_pos = i
            end do

        end if

        if (elec_pos == orb_pos .or. abs(elec_pos - orb_pos) == 2) then
            get_3body_sign = .false.
        else
            get_3body_sign = .true.
        end if

    end function get_3body_sign

    logical function check_momentum_sym(elecs, orbs)
        ! routine to check the momentum conservation for double and triple
        ! spawns
        ! although this could in fact be used for a general check of
        ! symmetry adaptability
        integer, intent(in) :: elecs(:), orbs(:)
#ifdef DEBUG_
        character(*), parameter :: this_routine = "check_momentum_sym"
#endif
        integer :: i
        type(Symmetry) :: sym_1, sym_2

        ASSERT(size(elecs) == size(orbs))

        ! make this more efficient here! and do not use all the old
        ! functionality

        check_momentum_sym = .true.

        if (.not. sum(get_spin_pn(elecs)) == sum(get_spin_pn(orbs))) then
            check_momentum_sym = .false.
            return
        end if

        ! get the symmetry symbol
        sym_1 = G1(elecs(1))%Sym
        sym_2 = G1(orbs(1))%Sym
        do i = 2, size(elecs)
            ! i could ofc also use G1 here again
            sym_1 = SymTable(sym_1%s, G1(elecs(i))%sym%s)
            sym_2 = SymTable(sym_2%s, G1(orbs(i))%sym%s)
        end do

        if (sym_1%s /= sym_2%s) then
            check_momentum_sym = .false.
        end if

    end function check_momentum_sym

    subroutine make_triple(nI, nJ, elecs, orbs, ex, tPar)
        integer, intent(in) :: nI(nel), elecs(3), orbs(3)
        integer, intent(out) :: nJ(nel), ex(2, 3)
        logical, intent(out) :: tPar
#ifdef DEBUG_
        character(*), parameter :: this_routine = "make_triple"
#endif
        integer :: sort_elecs(3), sort_orbs(3), src(3), pos_moved, k, i

        ! i should also check if this excitation is actually possible!
        ! and which spin i move to where or??

        ! figure out how to do triples efficiently..
        ! can we do a single and then a double?

        ! NO: talk to Manu and do this with integer representation! not
        ! with nI and nJ, since this can be done way more effective as
        ! via the occupied orbitals..

        ! TODO: thats a super strange convention, .. talk with Ali and
        ! Simon about that.. but for now accept it as it is..

        sort_elecs = sort_unique(elecs)
        sort_orbs = sort_unique(orbs)

        src = nI(sort_elecs)

        ex(1, :) = src
        ex(2, :) = sort_orbs

        nJ = nI

        ASSERT(sum(get_spin_pn(src)) == sum(get_spin_pn(orbs)))

        ! i should do some stuff depending on the order of src and tgt
        ! we have to check if electrons hop over other electrons, so we
        ! might have to change the indexing to adapt to the change in nJ!

        ! or check it individually:
        if (src(2) < sort_orbs(1)) then
            ! then i hops over j:
            sort_elecs(2) = sort_elecs(2) - 1
        end if
        if (src(3) < sort_orbs(1)) then
            ! then i hops over k
            ! (note: this also implies that j hops over k, but treat that
            ! seperately below, to also cover the case, where this if here
            ! is not fullfilled!)
            sort_elecs(3) = sort_elecs(3) - 1
        end if
        if (src(3) < sort_orbs(2)) then
            ! then j hops over k
            sort_elecs(3) = sort_elecs(3) - 1
        end if

        pos_moved = 0

        do k = 1, 3
            if (src(k) < sort_orbs(k)) then
                if (sort_elecs(k) == nel) then
                    ! this can only happen for k == 3
                    i = nel + 1
                    nJ(nel) = sort_orbs(k)
                else
                    do i = sort_elecs(k) + 1, nel
                        if (sort_orbs(k) < nJ(i)) then
                            nJ(i - 1) = sort_orbs(k)
                            exit
                        else
                            nJ(i - 1) = nJ(i)
                        end if
                    end do
                    if (i == nel + 1) then
                        nJ(nel) = sort_orbs(k)
                    end if
                end if
            else
                if (sort_elecs(k) == 1) then
                    i = 0
                    nJ(1) = sort_orbs(k)
                else
                    do i = sort_elecs(k) - 1, 1, -1
                        if (sort_orbs(k) > nJ(i)) then
                            nJ(i + 1) = sort_orbs(k)
                            exit
                        else
                            nJ(i + 1) = nJ(i)
                        end if
                    end do
                    if (i == 0) then
                        nJ(1) = sort_orbs(k)
                    end if
                end if
            end if

            pos_moved = pos_moved + sort_elecs(k) - i + 1

        end do

        tPar = btest(pos_moved, 0)

    end subroutine make_triple

    subroutine init_tmat_kspace(in_lat)
        ! similar to the real-space tmat setup also do this here based on
        ! the inputted lattice!
        class(lattice), optional :: in_lat
        character(*), parameter :: this_routine = "init_tmat_kspace"

        integer :: i

        if (present(in_lat)) then
            if (associated(tmat2d)) deallocate(tmat2d)

            allocate(tmat2d(nbasis, nbasis))
            tmat2d = 0.0_dp

            do i = 1, in_lat%get_nsites()
                tmat2d(2 * i - 1, 2 * i - 1) = bhub * in_lat%dispersion_rel_orb(i)
                tmat2d(2 * i, 2 * i) = bhub * in_lat%dispersion_rel_orb(i)
            end do

        else
            call Stop_All(this_routine, "not yet implemented!")
        end if

    end subroutine init_tmat_kspace

    subroutine setup_tmat_k_space(in_lat)
        ! routine which sets up the (diagonal) t-matrix in the k-space
        ! the dimensionality and connectivity of nearest and next-nearest
        ! neighbors influences that!
        class(lattice), intent(in), optional :: in_lat
        character(*), parameter :: this_routine = "setup_tmat_k_space"

        if (present(in_lat)) then
            if (all(nBasisMax == 0)) then
                call setup_nbasismax(in_lat)
            end if

            if (.not. associated(G1)) then
                call setup_g1(in_lat)
            end if
            ! else assume it is already setup correctly

            if (.not. associated(tmat2d)) then
                ! call the already implemented hubbard tmat calculator..
                ! for now only.. in the future we should do this standalone
                call CALCTMATHUB(in_lat%get_nsites() * 2, nBasisMax, bhub, &
                                 ttilt, G1,.not. in_lat%is_k_space(), in_lat%is_periodic())

            end if

        else
            call Stop_All(this_routine, "not yet implemented!")
        end if

    end subroutine setup_tmat_k_space

end module k_space_hubbard