UMatCache.F90 Source File


Contents

Source Code


Source Code

#include "macros.h"

MODULE UMatCache

    use constants, only: dp, sizeof_int, int64, stdout

    use SystemData, only: tROHF, tStoreSpinOrbs, tComplexWalkers_RealInts, &
                          Symmetry, BasisFN, UMatEps, tROHF

    use SystemData, only: tRIIntegrals, t_non_hermitian_2_body

    use util_mod, only: swap, get_free_unit, NECI_ICOPY, near_zero, operator(.div.), &
                        stop_all

    use sort_mod, only: sort

    use MemoryManager, only: TagIntType, LogMemAlloc, LogMemDealloc

    use HElem, only: HElement_t_size

    use CPMDData, only: NKPS

    use sym_mod, only: TotSymRep, LSymSym

    use HElem, only: HElement_t_size

    use legacy_data, only: irat

    use procedure_pointers, only: get_umat_el

    use cpmdstub_mod, only: CPMDANTISYMINTEL

    use orb_idx_mod, only: get_spat

    better_implicit_none

    external :: ReadDalton2EIntegrals

    SAVE
    private
    public :: UMatCacheData, UMATLABELS, ntypes, NPAIRS, nSlots, &
        UMATCACHEFLAG, tSmallUMat, UmatInd, gtid, numBasisIndices, spatial, &
        DFCoeffs, iDFMethod, DFInts, nauxbasis, DFInvFitInts, nBasisPairs, &
        tagDFCoeffs, DFFitInts, tDFInts, GetUMATSize, UMAT2D, TUMAT2D, &
        tagdffitints, tagdfinvfitints, tagdfints, nStates, getcacheindexstates, &
        umatconj, cachefcidump, fillupcache, setupumatcache, SETUMATCACHEFLAG, &
        tagumat2d, setumattrans, setupumat2d, get_umat_el, umat2ind

    public :: ttransfindx, nhits, nmisses, getcachedumatel, haskpoints, transtable, &
        gen2cpmdints, setup_umatind, treadincache, &
        nmeminit, idumpcacheflag, nslotsinit, freezetransfer, createinvbrr, &
        setupumat2d_df, nstatesdump, dumpumatcache, destroyumatcache, &
        writeumatcachestats, freezeumatcache, createinvbrr2, &
        freezeumat2d, setupumattranstable, tdeferred_umat2d, setupumat2d_dense, &
        ttransgtid, nullumat



! Integrals are cached for CPMD and density fitting calculations, where we might
! not be able to store all possible integrals in the available memory.

! The integral cache is stored in UMatCacheData.
! For real systems, nTypes=1, and we have a single value for each unique ordered pair of ordered pairs (i,k), (j,l)
! out of <ij|u|kl> (where i,j,k,l are state labels).
! For complex systems, nTypes=2, giving two possible values.
! For each nPairs (which corresponds to the greater of the two pairs), there  are nSlots, with the other pair label in
! UMatLabels, and the values in UMatCache.
! nStates is the maximum number of states stored in the cache (which may not be all the states if there are frozen virtuals).

    HElement_t(dp), Pointer :: UMatCacheData(:, :, :) => null() !(0:nTypes-1,nSlots,nPairs)
    INTEGER, Pointer :: UMatLabels(:, :) => null() !(nSlots,nPairs)
    INTEGER :: nSlots, nPairs, nTypes
    INTEGER :: nStates
    integer :: nBI_umat

! tSmallUMat is set if we have nStates slots per pair for storing the <ik|jk> integrals.
! This should only be used prior to freezing to store precalculated integrals.
! For each pair (i,j), we store the <ik|jk> integral in slot k.
    LOGICAL tSmallUMat

!  iDumpCacheFlag: Dump the cache to disk if we're told to.
!    =0: No cache dumping.
!    =1: Dump cache unless we've read in a cache dump that's from a larger
!    calculation than the current one.
!    =2: Force dumping of cache (over-writing previous cache level).
!  nStatesDump: number of states used in calculation which produced dump file.
!  tReadInCache: does what it says on the tin.
    integer iDumpCacheFlag, nStatesDump
    logical tReadInCache

! For the more frequently used <ij|u|ij> and <ij|u|ji> integrals, we store them
! in a separate cache (if TUMat2D is true).
!     UMAT2D : Stores the integrals of the form <ij|ij> and <ij|ji>
!     <ij|ij> is stored in the upper diagaonal, <ij|ji> in the
!     off-diagonal elements of the lower triangle.
    HElement_t(dp), Pointer :: UMat2D(:, :) => null() !(nStates,nStates)
    HElement_t(dp), Pointer :: UMat3d(:, :, :) => null()
    HElement_t(dp), Pointer :: UMat3dExch(:, :, :) => null()
    LOGICAL :: tUMat2D, tDeferred_Umat2d

! This vector stores the energy ordering for each spatial orbital, which is the inverse of the BRR vector
! This is needed for the memory saving star indexing system.
! E.g. Element 4 will give the the order in the energy of element 4
    INTEGER, DIMENSION(:), POINTER :: INVBRR => null()
    INTEGER, DIMENSION(:), POINTER :: INVBRR2 => null()

!NOCC is number of occupied spatial orbitals - needed for test in UMATInd, thought would be quicker
!than passing it in each time.
!Freezetransfer is a temporary measure to tell UMATIND when the freezing of orbitals is occuring.
    INTEGER :: NOCC
    LOGICAL :: FREEZETRANSFER

! Book-keeping information
! nSlotsInit is the number of slots requested on input.  If the number required is less,
!then the lower value is allocated
! If nSlotsInit is set to 0, then general <ij|u|kl> element caching is not performed, but
!UMat2D <ij|u|ij> and <ij|u|ji> is.  For nSlotsInit=-1 neither is performed.
    INTEGER nSlotsInit, nMemInit

! UMatCacheFlag=0 is normal operation
! UMatCacheFlag=1 means cache from bottom up
! This is useful when lots of sequential pieces of data are being stored.
! When UMatCacheFlag is reset to 0, the data which are present are spread evenly around the slots for a given Pair.
    INTEGER UMatCacheFlag

! If the max vertex level is 2 or less, then we just need to calculate
! <ij|ab> and never need <ib|aj>, unless the integral is for a single
! excitation.
    LOGICAL gen2CPMDInts

! nHits and nMisses are the number of cache hits and misses.
    INTEGER :: nHits, nMisses
! The number of cache overwrites
    INTEGER :: iCacheOvCount

! Some various translation tables to convert between different orderings of states.
    LOGICAL :: tTransGTID, tTransFindx
    INTEGER, Pointer :: TransTable(:) => null() !(NSTATES)
    INTEGER, Pointer :: InvTransTable(:) => null() !(NSTATES)

! Density fitting cache information: for generating integrals on the fly from density fitting.
    integer nAuxBasis, nBasisPairs
    logical tDFInts
    real(dp), Pointer :: DFCoeffs(:, :) => null() !(nAuxBasis,nBasisPairs)
    real(dp), Pointer :: DFInts(:, :) => null() !(nAuxBasis,nBasisPairs)
    real(dp), Pointer :: DFFitInts(:, :) => null() !(nAuxBasis,nAuxBasis)
    real(dp), Pointer :: DFInvFitInts(:, :) => null() !(nAuxBasis,nAuxBasis)
    INTEGER iDFMethod
!Some possible DFMethods sums over P, Q implied.  All precontracted to run in order(X) except DFOVERLAP2NDORD
! 0 - no DF
! DFOVERLAP        1 - (ij|u|ab)= (ij|u|P)(P|ab)
! DFOVERLAP2NDORD  2 - (ij|u|ab)= (ij|u|P)(P|ab)+(ij|P)(P|u|ab)-(ij|P)(P|u|Q)(Q|ab)
! DFOVERLAP2       3 - (ij|u|ab)= (ij|P)(P|u|Q)(Q|ab)
! DFCOULOMB        4 - (ij|u|ab)= (ij|u|P)[(P|u|Q)^-1](Q|u|ab)

    ! Memory book-keeping tags
    integer(TagIntType) :: tagUMatCacheData = 0
    integer(TagIntType) :: tagUMatLabels = 0
    integer(TagIntType) :: tagOUMatCacheData = 0
    integer(TagIntType) :: tagOUMatLabels = 0
    integer(TagIntType) :: tagUMat2D = 0
    ! [W.D]
    ! those two tags are also defined in OneEInts..
!       integer(TagIntType) :: tagTMat2D=0
!       integer(TagIntType) :: tagTMat2D2=0
    integer(TagIntType) :: tagTransTable = 0
    integer(TagIntType) :: tagInvTransTable = 0
    integer(TagIntType) :: tagDFCoeffs = 0
    integer(TagIntType) :: tagDFInts = 0
    integer(TagIntType) :: tagDFFitInts = 0
    integer(TagIntType) :: tagDFInvFitInts = 0
    integer(TagIntType) :: tagInvBRR = 0
    integer(TagIntType) :: tagInvBRR2 = 0

Contains

    SUBROUTINE CreateInvBRR2(BRR2, NBASIS)
        ! Create new INVBRR for the freezing process
        ! In:
        !    BRR(i)=j: orbital i is the j-th lowest in energy.
        !    nBasis: size of bais
        ! InvBRR is the inverse of BRR.  InvBRR(j)=i: the j-th lowest energy
        ! orbital corresponds to the i-th orbital in the original basis.
        INTEGER NBASIS
        INTEGER BRR2(NBASIS), ierr, I, t
        character(*), parameter :: t_r = 'CreateInvBRR2'

!        write(stdout,*) "================================"
!        write(stdout,*) "BRR2 is "
!        write(stdout,*) BRR2(:)

        allocate(INVBRR2(NBASIS / 2), STAT=ierr)
        CALL LogMemAlloc('INVBRR2', NBASIS / 2, 4, t_r, tagINVBRR2, ierr)
        INVBRR2(1:NBASIS / 2) = 0
        t = 0
        DO I = 2, NBASIS, 2
            t = t + 1
            INVBRR2(BRR2(I) / 2) = t
        end do

!        write(stdout,*) "================================"
!        write(stdout,*) "InvBRR2 is "
!        write(stdout,*) INVBRR2(:)


        RETURN
    END SUBROUTINE CreateInvBRR2

    SUBROUTINE CreateInvBRR(BRR, NBASIS)
        ! Create new INVBRR for the freezing process
        ! In:
        !    BRR(i)=j: orbital i is the j-th lowest in energy.
        !    nBasis: size of bais
        ! InvBRR is the inverse of BRR.  InvBRR(j)=i: the j-th lowest energy
        ! orbital corresponds to the i-th orbital in the original basis.
        INTEGER NBASIS
        INTEGER BRR(NBASIS), ierr, I, t
        character(*), parameter :: t_r = 'CreateInvBRR'

        IF (ASSOCIATED(INVBRR)) THEN
            CALL LogMemDealloc(t_r, tagINVBRR)
            DEallocate(INVBRR)
        end if
        allocate(INVBRR(NBASIS / 2), STAT=ierr)
        CALL LogMemAlloc('INVBRR', NBASIS / 2, 4, t_r, tagINVBRR, ierr)
        INVBRR(1:NBASIS / 2) = 0
        t = 0
        DO I = 2, NBASIS, 2
            t = t + 1
            INVBRR(BRR(I) / 2) = t
        end do
        RETURN

    END SUBROUTINE CreateInvBRR

    subroutine setup_UMatInd()
        use SystemData, only: nBasis

        nBI_umat = numBasisIndices(nBasis)
    end subroutine setup_UMatInd

    elemental function UMat2Ind(i, j, k, l) result(ind)
        use SystemData, only: nBasis
        integer, intent(in) :: i, j, k, l
        integer(int64) :: ind

        ind = UMatInd_base(i, j, k, l, numBasisIndices(nBasis))
    end function UMat2Ind

    elemental function UMatInd(i, j, k, l) result(ind)
        integer, intent(in) :: i, j, k, l
        integer(int64) :: ind

        ind = UMatInd_base(i, j, k, l, nBI_umat)
    end function UMatInd

    elemental FUNCTION UMatInd_base(I, J, K, L, nBI) result(UMatInd)
        ! Get the index of physical order UMAT element <IJ|KL>.
        ! Indices are internally reordered such that I>K, J>L,(I,K)>(J,L)
        ! Note: (i,k)>(j,l) := (k>l) || ((k==l)&&(i>j))
        ! In:
        !    I,J,K,L: orbital indices. These refer to spin orbitals in
        !      unrestricted calculations and spatial orbitals in restricted
        !      calculations.
        !    nBasis: size of basis. If =0, use nStates instead.
        !    nOccupied: # of occupied orbitals.  If =0, then nOcc is used.
        !    Should only be passed as non-zero during the freezing process.
        INTEGER, intent(in) :: I, J, K, L, nBI
        integer(int64) :: UMatInd
        integer :: A, B

        if (t_non_hermitian_2_body) then
            A = (I - 1) * nBi + K
            B = (J - 1) * nBi + L
        else
            !Combine indices I and K, ensuring I>K
            IF (I > K) THEN
                A = (I * (I - 1)) / 2 + K
            ELSE
                A = (K * (K - 1)) / 2 + I
            END IF

            !Combine indices J and L, ensuring J>L
            IF (J > L) THEN
                B = (J * (J - 1)) / 2 + L
            ELSE
                B = (L * (L - 1)) / 2 + J
            END IF
        end if

        !Combine (IK) and (JL) in a unique way  (k > l or if k = l then i > j)
        IF (A > B) THEN
            UMatInd = (int(A, int64) * int(A - 1, int64)) / 2 + int(B, int64)
        ELSE
            UMatInd = (int(B, int64) * int(B - 1, int64)) / 2 + int(A, int64)
        END IF
#ifdef CMPLX_
        if (.not. tComplexWalkers_RealInts) then
            UMatInd = (UmatInd - 1) * 2 + 1
            !We need to test whether we have swapped i and k or j and l independantly of each other
            !If we have done this, it is one of the 'other' integrals - add one.
            if (((I > K) .and. (J < L)) .or. ((I < K) .and. (J > L))) then
                UMatInd = UMatInd + 1
            end if
        end if
#endif
    END FUNCTION UMatInd_Base

    HElement_t(dp) elemental function UMatConj(I, J, K, L, val)
        integer, intent(in) :: I, J, K, L
        HElement_t(dp), intent(in) :: val
#ifdef CMPLX_
        INTEGER :: IDI, IDJ, IDK, IDL, NewA, A

        !Changing index ordering for the real ordering.
        IDI = I
        IDJ = J
        IDK = K
        IDL = L

        !First find rearranged indices.
        IF (idi < idk) then
            !swap idi and idk
            call swap(idi, idk)
        end if

        IF (idj < idl) then
            !swap idj and idl
            call swap(idj, idl)
        end if

        IF ((idl < idk) .or. ((idl == idk) .and. (idi < idj))) THEN
            !We would want to swap the (ik) and (jl) pair.
            call swap(idi, idj)
            call swap(idk, idl)
        end if

        !Indices now permuted to the real case ordering. Is this now the same integral?
        if (((I > K) .and. (J < L)) .or. ((I < K) .and. (J > L))) then
            !Type II integral - reducing to lowest ordering will give 'other'
            !integral, where one of (ik) and (jl) pairs have been swapped independantly.
            !If i = k, or j = l, we do not go through here.
            call swap(idi, idk)  !Revert back to the correct integral by swapping just i and k.
        end if

        !Want to see if the pairs of indices have swapped sides.
        !Make unique index from the ij and kl pairs
        IF (IDI > IDJ) THEN
            A = IDI * (IDI - 1) / 2 + IDJ
        ELSE
            A = IDJ * (IDJ - 1) / 2 + IDI
        end if

        !Create uniques indices from the original pairs of indices.
        !We only need to consider whether the (ij) pair has swapped sides, since
        !the <ij|ij> and <ij|ji> integrals are real by construction, and so we do not
        !need to consider what happens if the (ij) pair = (kl) pair.
        IF (I > J) THEN
            NewA = I * (I - 1) / 2 + J
        ELSE
            NewA = J * (J - 1) / 2 + I
        end if

        !Check whether pairs of indices have swapped sides.
        IF (NewA /= A) THEN
            UMatConj = CONJG(val) !Index pair i and j have swapped sides - take CC.
        ELSE
            UMatConj = val
        end if

#else
        integer :: tmp
        UMatConj = val

        ! Eliminate warnings
        tmp = i; tmp = j; tmp = k; tmp = l
#endif
    end function UMatConj

    SUBROUTINE GetUMatSize(nBasis, iSize)
        use SystemData, only: tStoreSpinOrbs
        ! Get the prospective size of a UMat (not a UMatCache) for completely
        ! storing FCIDUMP 2-e integrals
        ! In:
        !    nBasis: as above.
        !    iSS: ratio of spatial orbitals to spin orbitals.
        !         iSS=0 integrals not stored in UMAT.
        !         iSS=1 unrestricted calculation
        !         iSS=2 restricted calculation
        !         iSS=-1 flag for the Hubbard model.
        ! Out:
        !    iSize: size of UMAT.
        INTEGER nBasis, iSS
        INTEGER iPairs, nBi
        INTEGER(int64), intent(out) :: iSize
        IF (tStoreSpinOrbs) THEN
            iSS = 1
        ELSE
            iSS = 2
        end if

        nBi = nBasis / iSS
        if (t_non_hermitian_2_body) then
            iPairs = nbi**2
        else
            iPairs = (nBi * (nBi + 1)) / 2
        end if
        iSize = (int(iPairs, int64) * int(iPairs + 1, int64)) / 2
#ifdef CMPLX_
        !Since we now only have 4-fold symmetry, rather than 8-fold.
        iSize = iSize * 2
#endif
    END SUBROUTINE GetUMatSize

    SUBROUTINE SETUPUMATCACHE(NSTATE, TSMALL)
        ! nState: # states.
        ! TSMALL is used if we create a pre-freezing cache to hold just the <ij|kj> integrals.
        INTEGER NSTATE
        real(dp) Memory
        LOGICAL TSMALL
        INTEGER ierr
        character(len=*), parameter :: thisroutine = 'SETUPUMATCACHE'
        NTYPES = HElement_t_size
        NHITS = 0
        NMISSES = 0
        iCacheOvCount = 0
        NSTATES = NSTATE
        IF (NSLOTSINIT <= 0) THEN
            NSLOTS = 0
            write(stdout, *) "Not using UMATCACHE."
        ELSE
            NPAIRS = NSTATES * (NSTATES + 1) / 2
            write(stdout, *) "NPairs: ", NSTATES, NPAIRS
            IF (TSMALL) THEN
                NSLOTS = NSTATES
                tSmallUMat = .TRUE.
                write(stdout, *) "Using small pre-freezing UMat Cache."
            ELSE
                IF (nMemInit /= 0) THEN
                    write(stdout, *) "Allocating ", nMemInit, "Mb for UMatCache+Labels."
                    nSlotsInit = nint((nMemInit * 1048576 / 8) / (nPairs * (nTypes * HElement_t_size + 1.0_dp / irat)))
                end if
                NSLOTS = MIN(NPAIRS, NSLOTSINIT)
                tSmallUMat = .FALSE.
            end if
            UMATCACHEFLAG = 0
            write(stdout, "(A,I3,2I7,I10)") "UMAT NTYPES,NSLOTS,NPAIRS,TOT", NTYPES, NSLOTS, NPAIRS, NSLOTS * NPAIRS * NTYPES
            TUMAT2D = .FALSE.
            ! Each cache element stores <ij|ab> and <ib|aj>.  If real orbitals
            ! then these are identical and we can use this to halve the storage
            ! space (setting nTypes=1).  If not, we must store both explicitly
            ! (nTypes=2).
            allocate(UMatCacheData(0:nTypes - 1, nSlots, nPairs), STAT=ierr)
            call LogMemAlloc('UMatCache', nTypes * nSlots * nPairs, 8 * HElement_t_size, thisroutine, tagUMatCacheData)
            allocate(UMatLabels(nSlots, nPairs), STAT=ierr)
            CALL LogMemAlloc('UMATLABELS', nSlots * nPairs, 4, thisroutine, tagUMatLabels)
            Memory = (REAL(nTypes * nSlots, dp) * nPairs * 8.0_dp * HElement_t_size + nSlots * nPairs * 4.0_dp) * 9.536743316e-7_dp
            write(stdout, "(A,G20.10,A)") "Total memory allocated for storage of integrals in cache is: ", Memory, "Mb/Processor"

            UMatCacheData = (0.0_dp)
            UMATLABELS(1:nSlots, 1:nPairs) = 0
            if (.not. tSmallUMat .and. tReadInCache) then
                write(stdout, *) 'reading in cache'
                call ReadInUMatCache
            end if
        end if
    END SUBROUTINE SetupUMatCache

    SUBROUTINE SETUPUMAT2D(G1, HarInt)
        ! Set up UMat2D for storing the <ij|u|ij> and <ij|u|ji> integrals,
        ! and pre-calculate the common integrals (<ij|u|ij>, <ij|u|ji>,
        ! <i|v_har|j>) for CPMD calculations.
        ! In:
        !    G1: symmetry and momentum information on the basis functions.
        ! Out:
        !    HarInt(i,j)=<i|v_har|j>, where v_har is the Hartree potential.
        TYPE(BasisFN) G1(*)
        INTEGER ierr
        complex(dp) HarInt(nStates, nStates)
        character(len=*), parameter :: thisroutine = 'SETUPUMAT2D'
        IF ((NSLOTSINIT < 0)) THEN
            TUMAT2D = .FALSE.
            write(stdout, *) "Not using UMAT2D."
        ELSE
            TUMAT2D = .TRUE.
            allocate(UMat2D(nStates, nStates), STAT=ierr)
            UMat2D = 0.0_dp
            call LogMemAlloc('UMat2D', nStates**2, 8 * HElement_t_size, thisroutine, tagUMat2D, ierr)
            CALL CPMDANTISYMINTEL(G1, UMAT2D, HarInt, NSTATES)
        end if
    END SUBROUTINE SetupUMat2D

    SUBROUTINE SETUPUMAT2D_DF()
        ! Set up UMat2D for storing the <ij|u|ij> and <ij|u|ji> integrals for
        ! density fitting calculations.
        INTEGER ierr
        character(len=*), parameter :: thisroutine = 'SETUPUMAT2D_DF'
        IF (NSLOTSINIT < 0) THEN
            TUMAT2D = .FALSE.
            write(stdout, *) "Not using UMAT2D."
        ELSE
            TUMAT2D = .TRUE.
            allocate(UMat2D(nStates, nStates), STAT=ierr)
            UMat2D = 0.0_dp
!            write(stdout,*) "nStates for UMat2D: ",nStates
            call LogMemAlloc('UMat2D', nStates**2, 8 * HElement_t_size, thisroutine, tagUMat2D, ierr)
            IF (.not. tRIIntegrals) THEN
                CALL ReadDalton2EIntegrals(nStates, UMat2D, tUMat2D)
            end if
        end if
    END SUBROUTINE SetupUMat2D_DF

    SUBROUTINE SETUMATTRANS(TRANS)
        ! In:
        !    Trans: Translation list of orbitrals from one ordering to a new one.
        ! Currently only called in cpmdinit to re-order states by the
        ! one-particle energies (option is rarely used).
        ! Copy to UMatCache's translation table.
        INTEGER TRANS(NSTATES), ierr
        character(*), parameter :: thisroutine = 'SetupUMatTrans'
        allocate(TransTable(nStates), STAT=ierr)
        call LogMemAlloc('TransTable', nStates, 4, thisroutine, tagTransTable, ierr)
        CALL NECI_ICOPY(NSTATES, TRANS, 1, TransTable, 1)
        TTRANSGTID = .TRUE.
    END SUBROUTINE SetUMatTrans

    SUBROUTINE SetupUMatTransTable(OldNew, nOld, nNew)
        ! Set up translational table for freezing.
        ! In:
        !    nOld: # of old states.
        !    nNew: # of new states.
        !    OldNew: convert index in the old (pre-freezing) indexing scheme to
        !            the new (post-freezing) indexing scheme.
        INTEGER nNew, nOld, I
        INTEGER OldNew(*), ierr
        LOGICAL tDiff
        character(*), parameter :: thisroutine = 'SetupUMatTransTable'
        allocate(TransTable(nNew / 2), STAT=ierr)
        call LogMemAlloc('TransTable', nNew / 2, 4, thisroutine, tagTransTable, ierr)
        allocate(InvTransTable(nOld / 2), STAT=ierr)
        call LogMemAlloc('InvTransTable', nOld / 2, 4, thisroutine, tagInvTransTable, ierr)
        InvTransTable(1:nOld / 2) = 0
        tDiff = .FALSE.
        DO I = 2, nOld, 2
            IF (OldNew(I) /= 0) THEN
                TransTable(OldNew(I) / 2) = I / 2
                InvTransTable(I / 2) = OldNew(I) / 2
                IF (OldNew(I) / 2 /= I / 2) tDiff = .TRUE.
            end if
        end do
        IF (tDiff) THEN
            write(stdout, *) "New->Old State Translation Table"
            DO I = 1, nNew / 2
                write(stdout, *) I, TransTable(I)
            end do
        end if
        TTRANSFINDX = .TRUE.
    END SUBROUTINE SetupUMatTransTable

    SUBROUTINE DESTROYUMATCACHE
        character(len=*), parameter :: thisroutine = 'DESTROYUMATCACHE'
        CALL WriteUMatCacheStats()
        IF (ASSOCIated(UMatCacheData)) THEN
            write(stdout, *) "Destroying UMatCache"
            CALL LogMemDealloc(thisroutine, tagUMatCacheData)
            Deallocate(UMatCacheData)
            CALL LogMemDealloc(thisroutine, tagUMATLABELS)
            Deallocate(UMatLabels)
        end if
        IF (ASSOCIated(UMat2D)) THEN
            CALL LogMemDealloc(thisroutine, tagUMat2D)
            Deallocate(UMat2D)
        end if
        IF (ASSOCIated(TransTable)) THEN
            CALL LogMemDealloc(thisroutine, tagTransTable)
            Deallocate(TransTable)
        end if
        IF (ASSOCIated(InvTRANSTABLE)) THEN
            CALL LogMemDealloc(thisroutine, tagInvTransTable)
            Deallocate(InvTRANSTABLE)
        end if
    END SUBROUTINE DESTROYUMATCACHE

    SUBROUTINE WriteUMatCacheStats
        IF (ASSOCIated(UMatCacheData)) THEN
            write(stdout, *) "UMAT Cache Statistics"
            write(stdout, *) NHITS, " hits"
            write(stdout, *) NMISSES, " misses"
            write(stdout, *) iCacheOvCount, " overwrites"
            if (NHITS + NMISSES > 0) then
                write(stdout, "(F6.2,A)")(NHITS / (NHITS + NMISSES + 0.0_dp)) * 100, "% success"
            end if
        end if
    END SUBROUTINE WriteUMatCacheStats

    SUBROUTINE SETUMATCACHEFLAG(NEWFLAG)
        ! Change caching mode of UMatCache,
        ! In:
        !    NewFlag [0,1]: new value for UMatCacheFlag.
        !  flag=1: Storing just the <ik|u|jk> integrals in order they arrive
        !  in (and only have room to do so).
        !  flag=0: Distribute integrals throughout the cache in the scheme
        !  described at the top.
        INTEGER NEWFLAG
        SELECT CASE (UMATCACHEFLAG)
        CASE (1)
!  We were in direct cache mode where values were distributed correctly throughout the cache.
            IF (NEWFLAG == 0 .AND. .NOT. tSmallUMat) THEN
!  We need to fill the cache properly with values from the small cache.
                CALL FILLUPCACHE()
            end if
        END SELECT
        UMATCACHEFLAG = NEWFLAG
        SELECT CASE (NEWFLAG)
        CASE (1)
            IF (NSLOTS == NPAIRS) THEN ! we're storing every element, so we don't need to deal with different cacheing
                UMATCACHEFLAG = 0
            ELSE
                UMATLABELS(1:NSLOTS, 1:NPAIRS) = 0
!Turn on the direct caching, and clear the cache.
            end if
        END SELECT
        RETURN
    END SUBROUTINE SETUMATCACHEFLAG

    SUBROUTINE FillUpCache()
        ! Disperse the (pre-calculated) <ik|u|jk> integrals throughout the cache.
        ! The cache consists of an unordered set (in the standard UMatCache
        ! sense) of labels and elements.
        ! We must order this, and then distribute the elements throughout each set of SLOTS.
        INTEGER I, J, K, N, nK
        DO I = 1, nPairs
! Find the last value in the cache
            CALL BinarySearch(nPairs + 1, UMatLabels(1:nSlots, I), 1, nSlots, N, J, K)
            N = J - 1
! N is now the last element and thus number of elements.
! Sort according to label
            call sort(UMatLabels(1:N, i), UMatCacheData(:, 1:N, i))
            K = nSlots
! Now copy element among the whole array for this, from the end.
! (Multiple copies of the same integral in an unfilled cache make adding
! elements ! substantially faster.)
            DO J = N, 1, -1
                nK = (nSlots * (J - 1)) / N + 1
                UMatLabels(nK:K, I) = UMatLabels(J, I)
                DO K = K, nK, -1
                    UMatCacheData(:, K, I) = UMatCacheData(:, J, I)
                end do
                K = nK
            end do
        end do
    END SUBROUTINE FillUpCache

    SUBROUTINE BINARYSEARCH(VAL, TAB, A, B, LOC, LOC1, LOC2)
!   A binary search to find VAL in TAB.  TAB is sorted, but can have
!   multiple entries being the same.  If the search terminated unsuccessfully,
!   the entry indicated is one after half-way through the set of entries which
!   would be immediately prior to it.  From here until the label changes
!   should be filled with VAL if it is to be entered into the table.
!   A and B are the limits of the table.
!   If the search is successful, the location of VAL in TAB is returned in LOC
!   (and LOC1,LOC2).
!   If the search fails, then VAL should fit between LOC1 and LOC2 in TAB.
        INTEGER VAL, A, B, LOC, LOC1, LOC2
        INTEGER TAB(A:B)
        INTEGER I, J, IFIRST, N, ILAST
!         DO I=A,B
!            write(stdout,*) I,TAB(I)
!         end do
        I = A
        J = B
        IFIRST = I
        ILAST = J
        DO WHILE (J - I >= 1)
            N = (I + J) / 2
!            write(stdout,"(A,5I3)") "TN",I,J,N,TAB(N),VAL
            IF (TAB(N) < VAL .AND. TAB(N) /= 0 .AND. I /= N) THEN
                IF (TAB(N) /= TAB(IFIRST)) IFIRST = N
!   reset the lower limit
                I = N
            else if (TAB(N) > VAL .OR. TAB(N) == 0) THEN
                IF (TAB(N) /= TAB(ILAST)) ILAST = N
!   reset the upper limit
                J = N
            else if (TAB(N) == VAL) THEN
!   bingo, we've got it!
                LOC = N
!         DO I=A,B
!            write(stdout,*) I,TAB(I),I.EQ.LOC
!         end do
                LOC1 = N
                LOC2 = N
                RETURN
            ELSE
!   we've reached a situation where I and J's entries have the same value, and it's
!   not the one we want.  Leave the loop.
                I = J
            end if
        end do
!Finally, check the last element of the array, as it can still be there.
        IF (TAB(B) == VAL) THEN
            LOC = B
            LOC1 = B
            LOC2 = B
            RETURN
        end if
!   We've failed.  However, the new value should sit between I and J.
!   Split whichever of the prior or after slots which has the most duplicates
!         write(stdout,*) "FAIL:",IFIRST,I,J,ILAST
        LOC1 = IFIRST + 1
        LOC2 = ILAST - 1
        IF (TAB(IFIRST) == TAB(ILAST)) THEN
            LOC = (IFIRST + ILAST) / 2
            LOC1 = IFIRST
            LOC2 = ILAST
        else if (I - IFIRST >= ILAST - J) THEN
            LOC = (IFIRST + I) / 2
        ELSE
            LOC = (ILAST + J) / 2
        end if
!         DO I=A,B
!            write(stdout,*) I,TAB(I),I.EQ.LOC
!         end do
    END SUBROUTINE BinarySearch

!   Get a unique index corresponding to pair (I,J), and return in RET.
!  New Scheme 1/2/07
!   e.g. 11 12 13 14 15 corresponds to    1  2  4  7 11
!           22 23 24 25                      3  5  8 12
!              33 34 35                         6  9 13
!                 44 45                           10 14
!                    55                              15
    SUBROUTINE GETCACHEINDEX(I, J, RET)
        ! In:
        !    I,J (I<=J): state indices
        ! Out:
        !    Cache indexing scheme.
        INTEGER I, J, RET
        RET = J * (J - 1) / 2 + I
    END SUBROUTINE GetCacheIndex

! Example: calculating int(sqrt(2*ind)) from 2*ind.
!    2ind                    sqrt(2ind)
!     2  4  8 14 22... 92  -> 1  2  2  3  4 ... 9
!        6 10 16 24    94        2  3  4  4     9
!          12 18 26    96           3  4  5     9
!             20 28    98              4  5     9
!                30   100                 5    10
!                     102
!                     104
!                     106
!                     108
!                     110                      10
! But indexing scheme:
!   2ind                      J
!     2  4  8 14 22... 92  -> 1  2  3  4  5 ...10
!        6 10 16 24    94        2  3  4  5    10
!          12 18 26    96           3  4  5    10
!             20 28    98              4  5    10
!                30   100                 5    10
! Hence need to +1 to J in some cases.
    SUBROUTINE GETCACHEINDEXSTATES(IND, I, J)
        ! In:
        !   Ind: Cache index.
        ! Out:
        !   I,J (I<=J): states corresponding to cache index.
        ! Reverse of GetCacheIndex.
        INTEGER I, J, IND
        J = int(SQRT(2.0d0 * IND))
        IF (J * (J + 1) / 2 < IND) J = J + 1
        I = IND - J * (J - 1) / 2
    END SUBROUTINE GetCacheIndexStates

    SUBROUTINE FreezeUMatCache(OrbTrans, nOld, nNew)
        ! We're in the middle of freezing some orbitals.
        ! OrbTrans(i) will give us the new position of the old orbital i.
        INTEGER nOld, nNew, OrbTrans(nOld)
        INTEGER onSlots, onPairs
        if (nNew / 2 /= nStates .OR. tSmallUMat) THEN
            write(stdout, *) "Reordering UMatCache for freezing"
            onSlots = nSlots
            onPairs = nPairs
            CALL FreezeUMatCacheInt(OrbTrans, nOld, nNew, onSlots, onPairs)
        else
            write(stdout, *) "UMatCache size not changing.  Not reordering."
        end if
    END SUBROUTINE FreezeUMatCache

    SUBROUTINE FreezeUMAT2D(OldBasis, NewBasis, OrbTrans, iSS)
        INTEGER NewBasis, OldBasis, iSS, ierr, OrbTrans(OldBasis), i, j
        HElement_t(dp), POINTER :: NUMat2D(:, :)
        integer(TagIntType) :: tagNUMat2D = 0
        character(len=*), parameter :: thisroutine = 'FreezeUMat2D'

        allocate(NUMat2D(NewBasis / iSS, NewBasis / iSS), STAT=ierr)
        call LogMemAlloc('UMat2D',(NewBasis / iSS)**2, 8 * HElement_t_size, thisroutine, tagNUMat2D, ierr)
        NUMat2D(:, :) = (0.0_dp)
        DO i = 1, OldBasis / 2
            IF (OrbTrans(i * 2) /= 0) THEN
                DO j = 1, OldBasis / 2
                    IF (OrbTrans(j * 2) /= 0) THEN
                        NUMat2D(OrbTrans(i * 2) / 2, OrbTrans(j * 2) / 2) = UMat2D(i, j)
                    end if
                end do
            end if
        end do
        call LogMemDealloc(thisroutine, tagUMat2D)
        Deallocate(UMat2D)
        UMat2D => NUMat2D
        NULLIFY(NUMat2D)
        tagUMat2D = tagNUMat2D
        RETURN
    END SUBROUTINE FreezeUMAT2D

    SUBROUTINE FreezeUMatCacheInt(OrbTrans, nOld, nNew, onSlots, onPairs)
        INTEGER nOld, nNew, OrbTrans(nOld)
        HElement_t(dp), Pointer :: NUMat2D(:, :) !(nNew/2,nNew/2)
        integer(TagIntType) :: tagNUMat2D = 0
        HElement_t(dp) El(0:nTypes - 1)
        INTEGER i, j, k, l, m, n
        INTEGER ni, nj, nk, nl, nm, nn, A, B, iType
        HElement_t(dp), Pointer :: OUMatCacheData(:, :, :) !(0:nTypes-1,onSlots,onPairs)
        INTEGER, Pointer :: OUMatLabels(:, :) !(onSlots,onPairs)

        INTEGER onSlots, onPairs, ierr
        LOGICAL toSmallUMat, tlog, toUMat2D, tmpl
        character(len=*), parameter :: thisroutine = 'FreezeUMatCacheInt'

        toUMat2D = tUMat2D
        IF (tUMat2D) then
            allocate(NUMat2D(nNew / 2, nNew / 2), STAT=ierr)
            call LogMemAlloc('UMat2D',(nNew / 2)**2, 8 * HElement_t_size, thisroutine, tagNUMat2D, ierr)
! /2 because UMat2D works in states, not in orbitals
            DO i = 1, nOld / 2
                IF (OrbTrans(i * 2) /= 0) THEN
                    DO j = 1, nOld / 2
                        IF (OrbTrans(j * 2) /= 0) THEN
                            NUMat2D(OrbTrans(i * 2) / 2, OrbTrans(j * 2) / 2) = UMat2D(i, j)
                        end if
                    end do
                end if
            end do
            CALL LogMemDealloc(thisroutine, tagUMat2D)
            Deallocate(UMat2D)
            UMat2D => NUMat2D
            nullify(NUMat2D)
            tagUMat2D = tagNUMat2D
        end if
! Now go through the other cache.
! First save the memory used for it.
!         onSlots=nSlots
!         onPairs=nPairs
        OUMatCacheData => UMatCacheData
        tagOUMatCacheData = tagUMatCacheData
        OUMatLabels => UMatLabels
        tagOUMatLabels = tagUMatLabels
        toSmallUMat = tSmallUMat
        Nullify(UMatCacheData)
        Nullify(UMatLabels)
!Now reinitialize the cache.
        CALL SetupUMatCache(nNew / 2, .FALSE.)
        TUMAT2D = toUMat2D
        CALL SetUMatcacheFlag(1)
        DO i = 1, nOld / 2
            IF (OrbTrans(i * 2) /= 0) THEN
                DO k = i, nOld / 2
                    IF (OrbTrans(k * 2) /= 0) THEN
                        CALL GetCacheIndex(i, k, m)
                        DO n = 1, onSlots

                            tmpl = .true.
                            if (n /= 1) then
                                if (OUMatLabels(n, m) /= OUMatLabels(n - 1, m)) tmpl = .false.
                            end if
                            if ((onSlots == onPairs .or. toSmallUMat) .or. &
                                (onSlots /= onPairs .and. tmpl)) then
                                IF (OUMatLabels(n, m) /= 0) THEN
                                    ni = OrbTrans(i * 2) / 2
                                    nk = OrbTrans(k * 2) / 2
!Now get the label of the slot and convert to orbitals
                                    IF (onSlots == onPairs) THEN
                                        CALL GetCacheIndexStates(n, j, l)
                                    else if (toSmallUMat) THEN
                                        j = n
                                        l = n
                                    ELSE
                                        CALL GetCacheIndexStates(OUMatLabels(n, m), j, l)
                                    end if
                                    nj = OrbTrans(j * 2) / 2
                                    nl = OrbTrans(l * 2) / 2
                                    IF (nj /= 0 .AND. nl /= 0) THEN
                                        ! JSS: Alex, please check!
                                        ! GetCachedUMatEl called for cache indices: El(O) is
                                        ! a dummy argument.
                                        Tlog = GetCachedUmatEl(ni, nj, nk, nl, El(0), nm, nn, A, B, iType)
                                        CALL CacheUMatEl(B, OUMatCacheData(0, n:n + nTypes, m:m + nTypes), nm, nn, iType)
                                    end if
                                end if
                            end if
                        end do
                    end if
                end do
            end if
        end do
        CALL LogMemDealloc(thisroutine, tagOUMatLabels)
        Deallocate(OUMatLabels)
        CALL LogMemDealloc(thisroutine, tagOUMatCacheData)
        Deallocate(OUMatCacheData)
        CALL SetUMatCacheFlag(0)
    END SUBROUTINE FreezeUMatCacheInt

    SUBROUTINE CacheFCIDUMP(I, J, K, L, Z, CacheInd, ZeroedInt, NonZeroInt)
        INTEGER :: I, J, K, L, CacheInd(nPairs)
        INTEGER(int64) :: ZeroedInt, NonZeroInt
        INTEGER :: A, B
        HElement_t(dp) :: Z

        IF (abs(Z) < UMatEps) THEN
!We have an epsilon cutoff for the size of the two-electron integrals - UMatEps
            ZeroedInt = ZeroedInt + 1
            RETURN
        ELSE
            NonZeroInt = NonZeroInt + 1
        end if

!Find unique indices within permutational symmetry.
        IF (K < I) THEN
            CALL SWAP(I, K)
        end if
        IF (L < J) THEN
            CALL SWAP(J, L)
        end if
        CALL GETCACHEINDEX(I, K, A)
        CALL GETCACHEINDEX(J, L, B)
        IF (A > B) THEN
            CALL SWAP(A, B)
            CALL SWAP(I, J)
            CALL SWAP(K, L)
        end if

!          write(stdout,*) "Final Phys ordering: ",I,J,K,L
!          write(stdout,*) "Pair indices: ",A,B

        IF (A > nPairs) THEN
            write(stdout, *) "Final Phys ordering: ", I, J, K, L
            write(stdout, *) "Pair indices: ", A, B
            write(stdout, *) "nPairs,nSlots: ", nPairs, nSlots
            CALL Stop_All("CacheFCIDUMP", "Error in caching")
        end if

!Store the integral in a contiguous fashion. A is the index for the i,k pair
        IF (UMATLABELS(CacheInd(A), A) /= 0) THEN
            IF ((abs(REAL(UMatCacheData(nTypes - 1, CacheInd(A), A), dp) - Z)) > 1.0e-7_dp) THEN
                write(stdout, *) i, j, k, l, z, UMatCacheData(nTypes - 1, CacheInd(A), A)
                CALL Stop_All("CacheFCIDUMP", "Same integral cached in same place with different value")
            end if

            CALL Stop_All("CacheFCIDUMP", "Overwriting UMATLABELS")
        end if
        UMATLABELS(CacheInd(A), A) = B
        IF (.not. near_zero(REAL(UMatCacheData(nTypes - 1, CacheInd(A), A), dp))) THEN
            CALL Stop_All("CacheFCIDUMP", "Overwriting when trying to fill cache.")
        end if
        UMatCacheData(nTypes - 1, CacheInd(A), A) = Z

        CacheInd(A) = CacheInd(A) + 1
        IF (CacheInd(A) > nSlots) THEN
            CALL Stop_All("CacheFCIDUMP", "Error in filling cache")
        end if

!After we have filled all of the cache, this will want to be sorted.
    END SUBROUTINE CacheFCIDUMP

    subroutine ReadInUMatCache()
        ! Read in cache file from CacheDump.
        integer i, j, k, l, iCache1, iCache2, A, B, readerr, iType, iunit
        HElement_t(dp) UMatEl(0:nTypes - 1), DummyUMatEl(0:nTypes - 1)
        logical tDummy, testfile
        inquire(file="CacheDump", exist=testfile)
        if (.not. testfile) then
            write(stdout, *) 'CacheDump does not exist.'
            return
        end if
        iunit = get_free_unit()
        open(iunit, file="CacheDump", status="old", iostat=readerr)
        if (readerr /= 0) then
            write(stdout, *) 'Error reading CacheDump.'
            return
        end if
        read(iunit, *) nStatesDump
        readerr = 0
        do while (readerr == 0)
            read(iunit, *, iostat=readerr) i, j, k, l, UMatEl
            DummyUMatEl = UMatEl
            if (TTRANSFINDX) then
                i = TransTable(i)
                j = TransTable(j)
                k = TransTable(k)
                l = TransTable(l)
            end if
            if (min(i, j, k, l) > 0 .and. max(i, j, k, l) <= nStates) then
                ! Need to get cache indices before we cache the integral:
                ! a dummy call to GetCachedUMatEl returns the needed indices and
                ! integral type information.
                tDummy = GetCachedUMatEl(i, j, k, l, DummyUmatEl(0), iCache1, iCache2, A, B, iType)
                call CacheUMatEl(B, UMatEl, iCache1, iCache2, iType)
            end if
        end do
        close(iunit)
        return
    end subroutine ReadInUMatCache

    subroutine DumpUMatCache()
        ! Print out the cache contents so they can be read back in for a future
        ! calculation.  Need to print out the full set of indices, as the number of
        ! states may change with the next calculation.
        ! Variables
        integer iPair, iSlot, i, j, k, l, iCache1, iCache2, A, B, iType
        HElement_t(dp) UMatEl
        type(Symmetry) Sym
        integer iunit
        iunit = get_free_unit()
        open(iunit, file="CacheDump", status="unknown")
!      do i=1,nPairs !Run through ik pairs
!          do j=1,nSlots !Run through all pairs (unordered in the list)
!              write(iunit,*) i,j,UMatLabels(j,i),UMatCacheData(:,j,i)  !ik label, slot value, jl label, integral
!          end do
!      end do
!      write(iunit,*) "*****"
        write(iunit, *) nStates
        do iPair = 1, nPairs
            do iSlot = iPair, nSlots
                call GetCacheIndexStates(iPair, i, k)
                call GetCacheIndexStates(iSlot, j, l)
                Sym = TotSymRep()
                ! All integrals stored in the cache are non-zero by symmetry.
                if (LSymSym(Sym)) then
                    if (.not. GetCachedUMatEl(i, j, k, l, UMatEl, iCache1, iCache2, A, B, iType)) then
                        if (TTRANSFINDX) then
                            i = InvTransTable(i)
                            j = InvTransTable(j)
                            k = InvTransTable(k)
                            l = InvTransTable(l)
                        end if
                        ! Print out UmatCacheData as UMatEl holds just a single
                        ! integral.
                        write(iunit, *) i, j, k, l, UMatCacheData(:, ICACHE2, ICACHE1)!,A,B
                    end if
                end if
            end do
        end do
        close(iunit, status="keep")
        return
    end subroutine DumpUMatCache

    logical function HasKPoints()
        IF (NKPS > 1) THEN
            HasKPoints = .TRUE.
        ELSE
            HasKPoints = .FALSE.
        end if
    end function HasKPoints

    elemental function GTID(gInd) result(id)

        ! Convert spin orbital index to spacial orbital index if required
        ! (e.g. for restricted calculation)
        !
        ! In:  gInd - Spin orbital index
        ! Ret: id   - Overall index

        integer, intent(in) :: gInd
        integer :: id

        if (tStoreSpinOrbs) then
            ! Storing as spin-orbitals (UHF/default ROHF)
            id = gInd
        else
            ! Storing as spatial orbitals (RHF or explicit input option ROHF)
            id = get_spat(gInd)
        end if
        if (tTransGTID) id = TransTable(id)
    end function

    elemental function spatial(spin_orb) result(spat_orb)

        ! Convert a spin orbital label into a spatial one. This is more
        ! appropriate than the above function, gtid, when one always wants the
        ! spatial label, regardless of the way integrals are being stored.

        integer, intent(in) :: spin_orb
        integer :: spat_orb

        spat_orb = (spin_orb - 1) / 2 + 1

    end function spatial

    LOGICAL FUNCTION GETCACHEDUMATEL(IDI, IDJ, IDK, IDL, UMATEL, ICACHE, ICACHEI, A, B, ITYPE)
        ! In:
        !    IDI,IDJ,IDK,IDL: orbitals indices of the integral(s). These are
        !    rearranged, as disucssed below.
        ! Out:
        !    UMatEl: <ij|u|kl> integral (if found in the cache).
        !    A,B: Cache indices of (I,K) and (J,L) pairs (see GetCacheIndex).
        !    ICache,ICacheI: location of integral within the cache (or where it
        !                    should be stored if not found).
        !    iType: gives information (bit-wise) on where the requested
        !           integral lies within the cache slot.  Useful if the
        !           integral has to be computed (using the reordered indices).

! Lookup in the cache to see if there's a stored element.  If not, return TRUE,
! together with the information on how to compute the integral (re-ordered
! indices, cache indices and a "type" index).

! If the integral exists in the cache, return false and return the stored element in UMatEl.

! NOTE: This will rearrange IDI,IDJ,IDK,IDL into the correct order
! (i,k)<=(j,l) and i<=k, j<=l.  ICACHE corresponds to the pair (i,j), and
! ICACHEI is the index in that cache where the cache should be located.
! Note: (i,k)>(j,l) := (k>l) || ((k==l)&&(i>j))

!         use SystemData, only : nBasis,G1
        INTEGER IDI, IDJ, IDK, IDL, ICACHE, ICACHEI
        INTEGER ICACHEI1, ICACHEI2
        HElement_t(dp) UMATEL
        INTEGER A, B, ITYPE, ISTAR, ISWAP
!         LOGICAL tDebug
!         IF(IDI.eq.14.and.IDJ.eq.17.and.IDK.eq.23.and.IDL.eq.6) THEN
!             write(stdout,*) "Setting tDebug!"
!             tDebug=.true.
!         ELSE
!             tDebug=.false.
!         end if
!         write(stdout,"(A,4I5)") "GCUI",IDI,IDJ,IDK,IDL
        IF (NSLOTS == 0) THEN
!We don't have a cache so signal failure.
            GETCACHEDUMATEL = .TRUE.
            ICACHE = 0
            ITYPE = 0
            RETURN
        end if
!   First ensure the indices are in the correct order
        IF (tSmallUMat) THEN
!tSmallUMat is set if we have nStates slots per pair for storing the <ik|jk> integrals.
            ITYPE = 0
            IF (IDI == IDK) THEN
                B = IDI
                IF (IDL < IDJ) THEN
                    CALL SWAP(IDJ, IDL)
                    ITYPE = 2
                end if
                CALL GETCACHEINDEX(IDJ, IDL, A)
            else if (IDJ == IDL) THEN
                B = IDJ
                IF (IDK < IDI) THEN
                    CALL SWAP(IDI, IDK)
                    ITYPE = 2
                end if
                CALL GETCACHEINDEX(IDI, IDK, A)
            ELSE !Can't find a pair the same
!We don't have a cache for this type of element so signal failure.
                GETCACHEDUMATEL = .TRUE.
                ICACHE = 0
                ITYPE = 0
                RETURN
            end if
        ELSE
!Otherwise Normal caching
            ITYPE = 0
            ISTAR = 0
            ISWAP = 0
            IF (IDK < IDI) THEN
                CALL SWAP(IDI, IDK)
                ISTAR = IOR(ISTAR, 1)
            end if
            IF (IDL < IDJ) THEN
                CALL SWAP(IDJ, IDL)
                ISTAR = IOR(ISTAR, 2)
            end if
            CALL GETCACHEINDEX(IDI, IDK, A)
            CALL GETCACHEINDEX(IDJ, IDL, B)
            IF (A > B) THEN
                CALL SWAP(A, B)
                CALL SWAP(IDI, IDJ)
                CALL SWAP(IDK, IDL)
                ISWAP = 1
            end if
            IF (HElement_t_size == 1) THEN
!  Eight integrals from ijkl are the same.
                ITYPE = 0
            ELSE
!  Complex orbitals and integrals, so we need to consider different types
!  Using notation abcd rather than ijkl.  6/2/07 and 19/2/06
!
!  <> mean swap pairs <ab|cd> -> <ba|dc>
!  *.  means complex conjugate of first codensity i.e. <ab|cd> -> <cb|ad>
!  .* for second and ** for both.
!
!  abcd   -> badc <>
!  |  |
!  | \|/       |-> cdab ** -> dcba **<>
!  | cbad *.  -|
!  |           |-> bcda *.<>
! \|/
!  adcb .* -> dabc .*<>

!Now consider what must occur to the other integral in the slot to recover pair
!(abcd,cbad).  0 indicates 1st in slot, 1 means 2nd.  * indicated conjg.
!
!  ..    abcd  cbad  0  1
!  *.    cbad  abcd  1  0
!  .*    adcb  cbad  1* 0*
!  **    cdab  adcb  0* 1*
!    <>  badc  dabc  0  1*
!  *.<>  bcda  dcba  1* 0
!  .*<>  dabc  badc  1  0*
!  **<>  dcba  bcda  0* 1

! Of the type, bit zero indicates which of the two integrals in a slot to use.
!Bit 1 is set if the integral should be complex conjugated.
!   Bit 2 is set if the other integral in the slot should be complex conjugated
!if we are to have the structure (<ij|kl>,<kj|il>) in the slot.
!      This is only used by CacheUMatEl when adding a slot to the cache.
!
!  <ij|u|kl> and <kj|u|il> (which are distinct when complex orbitals are used).
!  TYPE 0          TYPE 1
!
!
                IF (ISTAR == 0) THEN
                    IF (ISWAP == 0) THEN
                        ITYPE = 0  !0  1
                    ELSE
                        ITYPE = 4  !0  1*
                    end if
                else if (ISTAR == 1) THEN
!  If we star the first pair, that corresponds to the plain TYPE 1.  If we swap too, then we complex conj.
                    IF (ISWAP == 0) THEN
                        ITYPE = 1  !1  0
                    ELSE
                        ITYPE = 3  !1* 0
                    end if
                else if (ISTAR == 2) THEN
!  If we star the second pair, that corresponds to TYPE 1.
!  If there's no swap, it's complex conjugated, otherwise it's not.
                    IF (ISWAP == 0) THEN
                        ITYPE = 7  !1* 0*
                    ELSE
                        ITYPE = 1  !1  0*
                    end if
                else if (ISTAR == 3) THEN
! We've starred both pairs
!  We complex conjg setting bit 1 but using type 0
                    IF (ISWAP == 0) THEN
                        ITYPE = 6  !0* 1*
                    ELSE
                        ITYPE = 2  !0* 1
                    end if
                end if
            end if
        end if
!         IF(tDebug) write(stdout,"(A,8I5)") "GCUE",IDI,IDJ,IDK,IDL,A,B,iType,UMatCacheFlag
        ICACHE = A
        IF (NSLOTS == NPAIRS .OR. tSmallUMat) THEN
!   we've a small enough system to store everything.
            ICACHEI = B
        ELSE
            IF (UMATCACHEFLAG == 1) THEN
!  UMATCACHEFLAG=1 means we are storing a sequence of cache elements,
!  in a blank cache
!  We store them linearly in the cache, and distribute them around later

!Find the last value in the cache
                CALL BINARYSEARCH(NPAIRS + 1, UMATLABELS(1:NSLOTS, A), 1, NSLOTS, ICACHEI, ICACHEI1, ICACHEI2)
                ICACHEI = ICACHEI1
                ICACHEI2 = ICACHEI1
                IF (UMatLabels(iCacheI, A) /= 0) iCacheOvCount = iCacheOvCount + 1

!write(stdout,*) "Cache Overwrite", A,B
!                  write(stdout,*) IDI,IDJ,IDK,IDL
!                  write(stdout,*) A,B,NSLOTS,NPAIRS
!                  write(stdout,*) ICACHEI1,ICACHEI2
!                  write(stdout,*) ICACHEI
            ELSE
!                IF(tDebug) THEN
!                     CALL DumpUMatCache(nBasis)
!                     write(8,*) B,NSLOTS
!                     write(8,*) UMATLABELS(1:NSLOTS,A)
!                 end if
                CALL BINARYSEARCH(B, UMATLABELS(1:NSLOTS, A), 1, NSLOTS, ICACHEI, ICACHEI1, ICACHEI2)
!                IF(tDebug) write(8,*) "***",UMATLABELS(ICACHEI,A),ICACHEI,ICACHEI1,ICACHEI2
            end if
        end if
        IF (UMATLABELS(ICACHEI, ICACHE) == B) THEN
            !write(stdout,*) "C",IDI,IDJ,IDK,IDL,ITYPE,UMatCacheData(0:nTypes-1,ICACHEI,ICACHE)
            UMATEL = UMatCacheData(IAND(ITYPE, 1), ICACHEI, ICACHE)
#ifdef CMPLX_
            IF (BTEST(ITYPE, 1)) UMATEL = CONJG(UMATEL)  ! Bit 1 tells us whether we need to complex conjg the integral
#endif
!   signal success
            GETCACHEDUMATEL = .FALSE.
        ELSE
!   signal failure
            GETCACHEDUMATEL = .TRUE.
!            write(stdout8,*) A,B,ICACHEI1,ICACHEI2,ICACHEI
        end if
        RETURN
    END FUNCTION GETCACHEDUMATEL

    !------------------------------------------------------------------------------------------!

    elemental function numBasisIndices(nBasis) result(nBI)
        integer, intent(in) :: nBasis
        integer :: nBI

        if (tStoreSpinOrbs) then
            nBI = nBasis
        else
            nBI = nBasis.div.2
        end if

    end function numBasisIndices

    !------------------------------------------------------------------------------------------!

    subroutine SetupUMat2d_dense(nBasis)
        integer, intent(in) :: nBasis
        integer :: nBI, i, j
        integer :: ierr
        character(*), parameter :: t_r = 'SetupUMat2d_dense'

        nBI = numBasisIndices(nBasis)
        ! allocate the storage
        if (.not. associated(UMat2D)) then
            allocate(UMat2D(nBI, nBI), stat=ierr)
            call LogMemAlloc('UMat2D', nBI**2, 8 * HElement_t_size, t_r, tagUMat2D, ierr)
        end if

        ! and fill in the array
        do i = 1, nBI
            do j = 1, nBI
                if (i == j) then
                    UMat2d(i, i) = get_umat_el(i, i, i, i)
                else
                    ! similarly the integrals <ij|ij> in UMat2D
                    UMat2D(j, i) = get_umat_el(i, j, i, j)
                    UMat2D(i, j) = get_umat_el(i, j, j, i)
                end if
            end do
        end do
    end subroutine SetupUMat2d_dense

    !------------------------------------------------------------------------------------------!
    ! Empty umat for tests
    !------------------------------------------------------------------------------------------!

    function nullUMat(i, j, k, l) result(hel)
        use constants
        integer, intent(in) :: i, j, k, l
        HElement_t(dp) :: hel

        ! This functions shows the behaviour of an empty UMat
        unused_var(i)
        unused_var(j)
        unused_var(k)
        unused_var(l)
        hel = 0.0_dp
    end function nullUMat

! Set an element in the cache.  All the work has been done for us before as the
! element we have to set is in (ICACHEI,ICACHE) iType tells us whether we need
! to swap/conjugate the nTypes integrals within the slot We still need to fill
! out the space before or after  us if we've been put in the middle of a block
! of duplicates.
    SUBROUTINE CACHEUMATEL(B, UMATEL, ICACHE, ICACHEI, iType)
        ! In:
        !    A,B: cache indices of the element.
        !    UMatEl: element being stored.  For calculations involving real
        !            orbitals, this is a array of size 1 containing the
        !            <ij|u|kl> integral (nTypes=1).  For calculations involving
        !            complex orbtials, this is an array of size 2 containing
        !            the <ij|u|kl> and <il|u|jk> integrals (nTypes=2).
        !    ICache: Segment index of the cache for storing integrals involving
        !            index A (often equal to A).
        !    ICacheI: Slot within ICache segment for storing UMatEl involving
        !             B.
        !    iType: See notes below.
        use constants, only: dp
        INTEGER B, ICACHE, ICACHEI
        HElement_t(dp) UMATEL(0:NTYPES - 1), TMP(0:NTYPES - 1)
        INTEGER OLAB, IC1, ITOTAL
        INTEGER iType
        INTEGER iIntPos
        SAVE ITOTAL
        DATA ITOTAL/0/
        if (nSlots == 0) return
!         write(stdout,*) "CU",A,B,UMATEL,iType
!         write(stdout,*) A,ICache,B,ICacheI
        if (nTypes > 1) then
! A number of different cases to deal with depending on the order the integral came in (see GetCachedUMatEl for details)
!  First get which pos in the slot will be the new first pos
            iIntPos = iand(iType, 1)
!  If bit 1 is set we must conjg the (to-be-)first integral
#ifdef CMPLX_
            if (btest(iType, 1)) then
                Tmp(0) = conjg(UMatEl(iIntPos))
            else
                Tmp(0) = UMatEl(iIntPos)
            end if
#else
            Tmp(0) = UMatEl(iIntPos)
#endif
!  If bit 2 is set we must conjg the (to-be-)second integral
#ifdef CMPLX_
            if (btest(iType, 2)) then
                Tmp(1) = conjg(UMatEl(1 - iIntPos))
            else
                Tmp(1) = UMatEl(1 - iIntPos)
            end if
#else
            Tmp(1) = UMatEl(1 - iIntPos)
#endif
            UMatEl = Tmp
        end if
!         write(stdout,*) "CU",A,B,UMATEL,iType
!         write(stdout9,*) NSLOTS,A,B,UMATEL,ICACHE,ICACHEI
        IF (NSLOTS == NPAIRS .OR. UMATCACHEFLAG == 1 .OR. tSmallUMat) THEN
!   small system.  only store a single element
            UMATLABELS(ICACHEI, ICACHE) = B
            UMatCacheData(:, ICACHEI, ICACHE) = UMATEL
            ITOTAL = ITOTAL + 1
            RETURN
        end if
        IC1 = ICACHEI
!         write(stdout,*) "ICI",ICACHEI,ICACHE
        OLAB = UMATLABELS(ICACHEI, ICACHE)
!   If we're in a block of prior, fill after
        DO WHILE (OLAB < B .AND. ICACHEI <= NSLOTS)
            UMatCacheData(:, ICACHEI, ICACHE) = UMATEL
            UMATLABELS(ICACHEI, ICACHE) = B
!            IF(ICACHEI.LT.1.OR.ICACHE.LT.1.OR.ICACHEI.GT.NSLOTS.OR.ICACHE.GT.NPAIRS) THEN
!               write(stdout,*) ICACHEI,ICACHE
!               STOP "a"
!            end if
            ICACHEI = ICACHEI + 1
            IF (ICACHEI <= NSLOTS) THEN
                OLAB = UMATLABELS(ICACHEI, ICACHE)
            ELSE
                OLAB = 0
            end if
        end do
        IF (OLAB == 0) ICACHEI = IC1
!        write(stdout,*) "ICI2",ICACHEI,ICACHE
        DO WHILE ((OLAB > B .OR. OLAB == 0) .AND. ICACHEI > 0)
            UMatCacheData(:, ICACHEI, ICACHE) = UMATEL
            UMATLABELS(ICACHEI, ICACHE) = B
!            IF(ICACHEI.LT.1.OR.ICACHE.LT.1.OR.ICACHEI.GT.NSLOTS.OR.ICACHE.GT.NPAIRS) THEN
!               write(stdout,*) ICACHEI,ICACHE
!               STOP "b"
!            end if
            ICACHEI = ICACHEI - 1
            if (icachei > 0) OLAB = UMATLABELS(ICACHEI, ICACHE)
        end do
    END SUBROUTINE CacheUMatEl

END MODULE UMatCache