Parallel.F90 Source File


Contents

Source Code


Source Code

#if !defined(SX)


#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_int
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_int
        module procedure MPIReduce_auto_int
    end interface


    interface MPISum
        module procedure MPISum_len_int
        module procedure MPISum_auto_int
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_int
        module procedure MPIBcast_len_int
        module procedure MPIBcast_auto_int
        module procedure MPIBcast_logic_int
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_int
        module procedure MPISumAll_auto_int
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_int
        module procedure MPIAllReduce_auto_int
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_int
        module procedure MPIScatter_auto_int
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_int
        module procedure MPIAllGather_auto_int
        module procedure MPIAllGather_auto2_int
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_int
    end interface


    interface MPIGather
        module procedure MPIGather_len_int
        module procedure MPIGather_auto_int
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_int
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_int
        module procedure MPIScatterV_len2_int
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_int
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_int
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_int
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_int
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_int
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_int
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_int
    end interface

    interface MPISum_len
        module procedure MPISum_len_int
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_int
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_int
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_int
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_int
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_int
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_int
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_int
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_int
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_int
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_int
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_int
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_int
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_int
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_int
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_int
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_int
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_int
    end interface

    interface MPIRecv
        module procedure MPIRecv_int
    end interface

    interface MPISend
        module procedure MPISend_int
    end interface

contains
    subroutine MPIReduce_len_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_int (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_int (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_int(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_int (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int32), intent(in) :: v
        integer(int32), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_int (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int32), intent(in) :: v
        integer(int32), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_int(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v
        integer(int32), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_int(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v
        integer(int32), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_int (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_int (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_int (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_int (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_INTEGER4, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_int (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_int (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER4, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_int (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_int (v, ret, Lengths, Disps, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_int (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_int (v, ret, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_int (v, ret, Lengths, Disps, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_int (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER4, &
                val_out, Length, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_int (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_int(v, Ret, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_int (Ret, BuffSize, Source, Tag, ierr)

        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_int (v, BuffSize, Dest, Tag, ierr)

        integer(int32), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module
#endif




#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_int64
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_int64
        module procedure MPIReduce_auto_int64
    end interface


    interface MPISum
        module procedure MPISum_len_int64
        module procedure MPISum_auto_int64
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_int64
        module procedure MPIBcast_len_int64
        module procedure MPIBcast_auto_int64
        module procedure MPIBcast_logic_int64
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_int64
        module procedure MPISumAll_auto_int64
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_int64
        module procedure MPIAllReduce_auto_int64
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_int64
        module procedure MPIScatter_auto_int64
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_int64
        module procedure MPIAllGather_auto_int64
        module procedure MPIAllGather_auto2_int64
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_int64
    end interface


    interface MPIGather
        module procedure MPIGather_len_int64
        module procedure MPIGather_auto_int64
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_int64
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_int64
        module procedure MPIScatterV_len2_int64
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_int64
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_int64
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_int64
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_int64
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_int64
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_int64
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_int64
    end interface

    interface MPISum_len
        module procedure MPISum_len_int64
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_int64
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_int64
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_int64
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_int64
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_int64
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_int64
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_int64
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_int64
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_int64
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_int64
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_int64
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_int64
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_int64
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_int64
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_int64
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_int64
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_int64
    end interface

    interface MPIRecv
        module procedure MPIRecv_int64
    end interface

    interface MPISend
        module procedure MPISend_int64
    end interface

contains
    subroutine MPIReduce_len_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_int64 (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_int64 (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_int64(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_int64 (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int64), intent(in) :: v
        integer(int64), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_int64 (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int64), intent(in) :: v
        integer(int64), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_int64(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v
        integer(int64), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_int64(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v
        integer(int64), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_int64 (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_int64 (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_int64 (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_int64 (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_INTEGER8, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_int64 (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_int64 (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER8, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_int64 (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_int64 (v, ret, Lengths, Disps, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_int64 (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_int64 (v, ret, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_int64 (v, ret, Lengths, Disps, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_int64 (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER8, &
                val_out, Length, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_int64 (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_int64(v, Ret, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_int64 (Ret, BuffSize, Source, Tag, ierr)

        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_int64 (v, BuffSize, Dest, Tag, ierr)

        integer(int64), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_doub
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_doub
        module procedure MPIReduce_auto_doub
    end interface


    interface MPISum
        module procedure MPISum_len_doub
        module procedure MPISum_auto_doub
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_doub
        module procedure MPIBcast_len_doub
        module procedure MPIBcast_auto_doub
        module procedure MPIBcast_logic_doub
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_doub
        module procedure MPISumAll_auto_doub
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_doub
        module procedure MPIAllReduce_auto_doub
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_doub
        module procedure MPIScatter_auto_doub
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_doub
        module procedure MPIAllGather_auto_doub
        module procedure MPIAllGather_auto2_doub
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_doub
    end interface


    interface MPIGather
        module procedure MPIGather_len_doub
        module procedure MPIGather_auto_doub
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_doub
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_doub
        module procedure MPIScatterV_len2_doub
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_doub
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_doub
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_doub
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_doub
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_doub
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_doub
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_doub
    end interface

    interface MPISum_len
        module procedure MPISum_len_doub
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_doub
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_doub
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_doub
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_doub
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_doub
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_doub
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_doub
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_doub
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_doub
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_doub
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_doub
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_doub
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_doub
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_doub
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_doub
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_doub
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_doub
    end interface

    interface MPIRecv
        module procedure MPIRecv_doub
    end interface

    interface MPISend
        module procedure MPISend_doub
    end interface

contains
    subroutine MPIReduce_len_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_doub (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_doub (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_doub(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_doub (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        real(dp), intent(in) :: v
        real(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_doub (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        real(dp), intent(in) :: v
        real(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_doub(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v
        real(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_doub(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v
        real(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_doub (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_doub (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_doub (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_doub (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_DOUBLE_PRECISION, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_doub (v, SendSize, ret, RecvSize, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_doub (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_doub (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_doub (v, ret, Lengths, Disps, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_doub (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_doub (v, ret, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_doub (v, ret, Lengths, Disps, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_doub (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_PRECISION, &
                val_out, Length, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_doub (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_doub(v, Ret, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_doub (Ret, BuffSize, Source, Tag, ierr)

        real(dp), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_doub (v, BuffSize, Dest, Tag, ierr)

        real(dp), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_comp
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_comp
        module procedure MPIReduce_auto_comp
    end interface


    interface MPISum
        module procedure MPISum_len_comp
        module procedure MPISum_auto_comp
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_comp
        module procedure MPIBcast_len_comp
        module procedure MPIBcast_auto_comp
        module procedure MPIBcast_logic_comp
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_comp
        module procedure MPISumAll_auto_comp
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_comp
        module procedure MPIAllReduce_auto_comp
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_comp
        module procedure MPIScatter_auto_comp
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_comp
        module procedure MPIAllGather_auto_comp
        module procedure MPIAllGather_auto2_comp
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_comp
    end interface


    interface MPIGather
        module procedure MPIGather_len_comp
        module procedure MPIGather_auto_comp
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_comp
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_comp
        module procedure MPIScatterV_len2_comp
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_comp
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_comp
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_comp
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_comp
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_comp
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_comp
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_comp
    end interface

    interface MPISum_len
        module procedure MPISum_len_comp
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_comp
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_comp
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_comp
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_comp
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_comp
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_comp
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_comp
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_comp
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_comp
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_comp
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_comp
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_comp
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_comp
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_comp
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_comp
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_comp
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_comp
    end interface

    interface MPIRecv
        module procedure MPIRecv_comp
    end interface

    interface MPISend
        module procedure MPISend_comp
    end interface

contains
    subroutine MPIReduce_len_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_comp (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_comp (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_comp(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_comp (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        complex(dp), intent(in) :: v
        complex(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_comp (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        complex(dp), intent(in) :: v
        complex(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_comp(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v
        complex(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_comp(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v
        complex(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_comp (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_comp (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_comp (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_comp (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_DOUBLE_COMPLEX, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_comp (v, SendSize, ret, RecvSize, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_comp (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_comp (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_comp (v, ret, Lengths, Disps, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_comp (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_comp (v, ret, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_comp (v, ret, Lengths, Disps, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_comp (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_COMPLEX, &
                val_out, Length, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_comp (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_comp(v, Ret, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_comp (Ret, BuffSize, Source, Tag, ierr)

        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_comp (v, BuffSize, Dest, Tag, ierr)

        complex(dp), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module

#if !defined(SX)


#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr_int
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr_int
        module procedure MPIReduce_auto_arr_int
    end interface


    interface MPISum
        module procedure MPISum_len_arr_int
        module procedure MPISum_auto_arr_int
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr_int
        module procedure MPIBcast_len_arr_int
        module procedure MPIBcast_auto_arr_int
        module procedure MPIBcast_logic_arr_int
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr_int
        module procedure MPISumAll_auto_arr_int
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr_int
        module procedure MPIAllReduce_auto_arr_int
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr_int
        module procedure MPIScatter_auto_arr_int
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr_int
        module procedure MPIAllGather_auto_arr_int
        module procedure MPIAllGather_auto2_arr_int
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr_int
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr_int
        module procedure MPIGather_auto_arr_int
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr_int
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr_int
        module procedure MPIScatterV_len2_arr_int
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr_int
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr_int
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr_int
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr_int
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr_int
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr_int
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr_int
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr_int
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr_int
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr_int
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr_int
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr_int
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr_int
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr_int
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr_int
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr_int
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr_int
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr_int
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr_int
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr_int
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr_int
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr_int
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr_int
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr_int
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr_int
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr_int
    end interface

    interface MPISend
        module procedure MPISend_arr_int
    end interface

contains
    subroutine MPIReduce_len_arr_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr_int (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr_int (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr_int(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr_int (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int32), intent(in) :: v(:)
        integer(int32), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr_int (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int32), intent(in) :: v(:)
        integer(int32), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr_int(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:)
        integer(int32), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr_int(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:)
        integer(int32), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr_int (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr_int (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr_int (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr_int (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                    MPI_INTEGER4, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr_int (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr_int (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER4, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
#if 1 != 0
        Ret(:) = v(1:size(Ret,1))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr_int (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr_int (v, ret, Lengths, Disps, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr_int (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr_int (v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr_int (v, ret, Lengths, Disps, ierr, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr_int (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER4, &
                val_out, Length, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:) = v(:,lbound(v,1+1):&
                  (lbound(v,1+1)+(Length/(1+1))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr_int (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr_int(v, Ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: Ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr_int (Ret, BuffSize, Source, Tag, ierr)

        integer(int32), intent(out), target :: Ret(:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr_int (v, BuffSize, Dest, Tag, ierr)

        integer(int32), intent(in), target :: v(:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module
#endif




#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr_int64
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr_int64
        module procedure MPIReduce_auto_arr_int64
    end interface


    interface MPISum
        module procedure MPISum_len_arr_int64
        module procedure MPISum_auto_arr_int64
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr_int64
        module procedure MPIBcast_len_arr_int64
        module procedure MPIBcast_auto_arr_int64
        module procedure MPIBcast_logic_arr_int64
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr_int64
        module procedure MPISumAll_auto_arr_int64
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr_int64
        module procedure MPIAllReduce_auto_arr_int64
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr_int64
        module procedure MPIScatter_auto_arr_int64
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr_int64
        module procedure MPIAllGather_auto_arr_int64
        module procedure MPIAllGather_auto2_arr_int64
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr_int64
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr_int64
        module procedure MPIGather_auto_arr_int64
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr_int64
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr_int64
        module procedure MPIScatterV_len2_arr_int64
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr_int64
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr_int64
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr_int64
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr_int64
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr_int64
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr_int64
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr_int64
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr_int64
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr_int64
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr_int64
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr_int64
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr_int64
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr_int64
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr_int64
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr_int64
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr_int64
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr_int64
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr_int64
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr_int64
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr_int64
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr_int64
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr_int64
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr_int64
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr_int64
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr_int64
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr_int64
    end interface

    interface MPISend
        module procedure MPISend_arr_int64
    end interface

contains
    subroutine MPIReduce_len_arr_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr_int64 (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER8, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr_int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr_int64 (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr_int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr_int64(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr_int64 (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int64), intent(in) :: v(:)
        integer(int64), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr_int64 (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int64), intent(in) :: v(:)
        integer(int64), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr_int64(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v(:)
        integer(int64), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr_int64(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v(:)
        integer(int64), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr_int64 (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr_int64 (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr_int64 (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr_int64 (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                    MPI_INTEGER8, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr_int64 (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(out), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr_int64 (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER8, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
#if 1 != 0
        Ret(:) = v(1:size(Ret,1))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr_int64 (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr_int64 (v, ret, Lengths, Disps, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr_int64 (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr_int64 (v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr_int64 (v, ret, Lengths, Disps, ierr, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr_int64 (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER8, &
                val_out, Length, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:) = v(:,lbound(v,1+1):&
                  (lbound(v,1+1)+(Length/(1+1))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr_int64 (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr_int64(v, Ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: Ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr_int64 (Ret, BuffSize, Source, Tag, ierr)

        integer(int64), intent(out), target :: Ret(:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr_int64 (v, BuffSize, Dest, Tag, ierr)

        integer(int64), intent(in), target :: v(:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr_doub
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr_doub
        module procedure MPIReduce_auto_arr_doub
    end interface


    interface MPISum
        module procedure MPISum_len_arr_doub
        module procedure MPISum_auto_arr_doub
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr_doub
        module procedure MPIBcast_len_arr_doub
        module procedure MPIBcast_auto_arr_doub
        module procedure MPIBcast_logic_arr_doub
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr_doub
        module procedure MPISumAll_auto_arr_doub
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr_doub
        module procedure MPIAllReduce_auto_arr_doub
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr_doub
        module procedure MPIScatter_auto_arr_doub
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr_doub
        module procedure MPIAllGather_auto_arr_doub
        module procedure MPIAllGather_auto2_arr_doub
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr_doub
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr_doub
        module procedure MPIGather_auto_arr_doub
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr_doub
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr_doub
        module procedure MPIScatterV_len2_arr_doub
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr_doub
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr_doub
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr_doub
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr_doub
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr_doub
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr_doub
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr_doub
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr_doub
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr_doub
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr_doub
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr_doub
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr_doub
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr_doub
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr_doub
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr_doub
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr_doub
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr_doub
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr_doub
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr_doub
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr_doub
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr_doub
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr_doub
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr_doub
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr_doub
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr_doub
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr_doub
    end interface

    interface MPISend
        module procedure MPISend_arr_doub
    end interface

contains
    subroutine MPIReduce_len_arr_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        real(dp), intent(in), target :: v(:)
        real(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr_doub (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        real(dp), intent(in), target :: v(:)
        real(dp), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        real(dp), intent(in), target :: v(:)
        real(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr_doub (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        real(dp), intent(in), target :: v(:)
        real(dp), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr_doub(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        real(dp), intent(in), target :: v(:)
        real(dp), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr_doub (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        real(dp), intent(in) :: v(:)
        real(dp), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr_doub (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        real(dp), intent(in) :: v(:)
        real(dp), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr_doub(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:)
        real(dp), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr_doub(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:)
        real(dp), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr_doub (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr_doub (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr_doub (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr_doub (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                    MPI_DOUBLE_PRECISION, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr_doub (v, SendSize, ret, RecvSize, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr_doub (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
#if 1 != 0
        Ret(:) = v(1:size(Ret,1))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr_doub (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr_doub (v, ret, Lengths, Disps, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr_doub (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr_doub (v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr_doub (v, ret, Lengths, Disps, ierr, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr_doub (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_PRECISION, &
                val_out, Length, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:) = v(:,lbound(v,1+1):&
                  (lbound(v,1+1)+(Length/(1+1))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr_doub (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr_doub(v, Ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: Ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr_doub (Ret, BuffSize, Source, Tag, ierr)

        real(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr_doub (v, BuffSize, Dest, Tag, ierr)

        real(dp), intent(in), target :: v(:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr_comp
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr_comp
        module procedure MPIReduce_auto_arr_comp
    end interface


    interface MPISum
        module procedure MPISum_len_arr_comp
        module procedure MPISum_auto_arr_comp
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr_comp
        module procedure MPIBcast_len_arr_comp
        module procedure MPIBcast_auto_arr_comp
        module procedure MPIBcast_logic_arr_comp
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr_comp
        module procedure MPISumAll_auto_arr_comp
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr_comp
        module procedure MPIAllReduce_auto_arr_comp
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr_comp
        module procedure MPIScatter_auto_arr_comp
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr_comp
        module procedure MPIAllGather_auto_arr_comp
        module procedure MPIAllGather_auto2_arr_comp
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr_comp
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr_comp
        module procedure MPIGather_auto_arr_comp
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr_comp
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr_comp
        module procedure MPIScatterV_len2_arr_comp
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr_comp
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr_comp
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr_comp
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr_comp
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr_comp
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr_comp
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr_comp
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr_comp
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr_comp
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr_comp
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr_comp
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr_comp
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr_comp
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr_comp
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr_comp
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr_comp
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr_comp
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr_comp
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr_comp
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr_comp
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr_comp
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr_comp
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr_comp
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr_comp
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr_comp
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr_comp
    end interface

    interface MPISend
        module procedure MPISend_arr_comp
    end interface

contains
    subroutine MPIReduce_len_arr_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr_comp (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr_comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr_comp (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr_comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr_comp(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr_comp (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        complex(dp), intent(in) :: v(:)
        complex(dp), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr_comp (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        complex(dp), intent(in) :: v(:)
        complex(dp), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr_comp(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v(:)
        complex(dp), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr_comp(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v(:)
        complex(dp), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr_comp (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr_comp (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr_comp (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr_comp (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                    MPI_DOUBLE_COMPLEX, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr_comp (v, SendSize, ret, RecvSize, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr_comp (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
#if 1 != 0
        Ret(:) = v(1:size(Ret,1))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr_comp (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr_comp (v, ret, Lengths, Disps, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr_comp (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr_comp (v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr_comp (v, ret, Lengths, Disps, ierr, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: ret(:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)
        LengthsN = int(Lengths &
         * (ubound(v,1)-lbound(v,1)+1),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int((ubound(v,1)-lbound(v,1)+1) &
         * (ubound(v,1+1)-lbound(v,1+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr_comp (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_COMPLEX, &
                val_out, Length, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:) = v(:,lbound(v,1+1):&
                  (lbound(v,1+1)+(Length/(1+1))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr_comp (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr_comp(v, Ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: Ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr_comp (Ret, BuffSize, Source, Tag, ierr)

        complex(dp), intent(out), target :: Ret(:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr_comp (v, BuffSize, Dest, Tag, ierr)

        complex(dp), intent(in), target :: v(:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module

#if !defined(SX)


#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr2_int
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr2_int
        module procedure MPIReduce_auto_arr2_int
    end interface


    interface MPISum
        module procedure MPISum_len_arr2_int
        module procedure MPISum_auto_arr2_int
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr2_int
        module procedure MPIBcast_len_arr2_int
        module procedure MPIBcast_auto_arr2_int
        module procedure MPIBcast_logic_arr2_int
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr2_int
        module procedure MPISumAll_auto_arr2_int
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr2_int
        module procedure MPIAllReduce_auto_arr2_int
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr2_int
        module procedure MPIScatter_auto_arr2_int
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr2_int
        module procedure MPIAllGather_auto_arr2_int
        module procedure MPIAllGather_auto2_arr2_int
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr2_int
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr2_int
        module procedure MPIGather_auto_arr2_int
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr2_int
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr2_int
        module procedure MPIScatterV_len2_arr2_int
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr2_int
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr2_int
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr2_int
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr2_int
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr2_int
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr2_int
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr2_int
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr2_int
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr2_int
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr2_int
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr2_int
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr2_int
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr2_int
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr2_int
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr2_int
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr2_int
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr2_int
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr2_int
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr2_int
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr2_int
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr2_int
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr2_int
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr2_int
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr2_int
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr2_int
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr2_int
    end interface

    interface MPISend
        module procedure MPISend_arr2_int
    end interface

contains
    subroutine MPIReduce_len_arr2_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr2_int (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER4, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr2_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr2_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr2_int (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr2_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr2_int(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr2_int (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int32), intent(in) :: v(:,:)
        integer(int32), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr2_int (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int32), intent(in) :: v(:,:)
        integer(int32), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr2_int(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:,:)
        integer(int32), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr2_int(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:,:)
        integer(int32), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr2_int (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr2_int (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr2_int (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr2_int (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                    MPI_INTEGER4, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr2_int (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(out), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr2_int (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER4, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
#if 2 != 0
        Ret(:,:) = v(:,1:size(Ret,2))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr2_int (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr2_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr2_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr2_int (v, ret, Lengths, Disps, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr2_int (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr2_int (v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:)
        integer(int32), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr2_int (v, ret, Lengths, Disps, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr2_int (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER4, &
                val_out, Length, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:) = v(:,:,lbound(v,2+1):&
                  (lbound(v,2+1)+(Length/(1+2))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr2_int (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr2_int(v, Ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr2_int (Ret, BuffSize, Source, Tag, ierr)

        integer(int32), intent(out), target :: Ret(:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr2_int (v, BuffSize, Dest, Tag, ierr)

        integer(int32), intent(in), target :: v(:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module
#endif




#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr2_int64
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr2_int64
        module procedure MPIReduce_auto_arr2_int64
    end interface


    interface MPISum
        module procedure MPISum_len_arr2_int64
        module procedure MPISum_auto_arr2_int64
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr2_int64
        module procedure MPIBcast_len_arr2_int64
        module procedure MPIBcast_auto_arr2_int64
        module procedure MPIBcast_logic_arr2_int64
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr2_int64
        module procedure MPISumAll_auto_arr2_int64
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr2_int64
        module procedure MPIAllReduce_auto_arr2_int64
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr2_int64
        module procedure MPIScatter_auto_arr2_int64
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr2_int64
        module procedure MPIAllGather_auto_arr2_int64
        module procedure MPIAllGather_auto2_arr2_int64
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr2_int64
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr2_int64
        module procedure MPIGather_auto_arr2_int64
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr2_int64
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr2_int64
        module procedure MPIScatterV_len2_arr2_int64
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr2_int64
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr2_int64
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr2_int64
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr2_int64
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr2_int64
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr2_int64
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr2_int64
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr2_int64
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr2_int64
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr2_int64
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr2_int64
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr2_int64
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr2_int64
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr2_int64
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr2_int64
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr2_int64
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr2_int64
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr2_int64
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr2_int64
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr2_int64
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr2_int64
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr2_int64
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr2_int64
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr2_int64
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr2_int64
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr2_int64
    end interface

    interface MPISend
        module procedure MPISend_arr2_int64
    end interface

contains
    subroutine MPIReduce_len_arr2_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr2_int64 (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER8, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr2_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr2_int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr2_int64 (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr2_int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr2_int64(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr2_int64 (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int64), intent(in) :: v(:,:)
        integer(int64), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr2_int64 (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int64), intent(in) :: v(:,:)
        integer(int64), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr2_int64(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v(:,:)
        integer(int64), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr2_int64(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v(:,:)
        integer(int64), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr2_int64 (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr2_int64 (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr2_int64 (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr2_int64 (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                    MPI_INTEGER8, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr2_int64 (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(out), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr2_int64 (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER8, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
#if 2 != 0
        Ret(:,:) = v(:,1:size(Ret,2))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr2_int64 (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr2_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr2_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr2_int64 (v, ret, Lengths, Disps, Node)

        integer(int64), intent(in), target :: v(:,:,:)
        integer(int64), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr2_int64 (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr2_int64 (v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:)
        integer(int64), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr2_int64 (v, ret, Lengths, Disps, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:)
        integer(int64), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr2_int64 (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER8, &
                val_out, Length, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:) = v(:,:,lbound(v,2+1):&
                  (lbound(v,2+1)+(Length/(1+2))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr2_int64 (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int64), intent(in), target :: v(:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr2_int64(v, Ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr2_int64 (Ret, BuffSize, Source, Tag, ierr)

        integer(int64), intent(out), target :: Ret(:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr2_int64 (v, BuffSize, Dest, Tag, ierr)

        integer(int64), intent(in), target :: v(:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr2_doub
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr2_doub
        module procedure MPIReduce_auto_arr2_doub
    end interface


    interface MPISum
        module procedure MPISum_len_arr2_doub
        module procedure MPISum_auto_arr2_doub
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr2_doub
        module procedure MPIBcast_len_arr2_doub
        module procedure MPIBcast_auto_arr2_doub
        module procedure MPIBcast_logic_arr2_doub
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr2_doub
        module procedure MPISumAll_auto_arr2_doub
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr2_doub
        module procedure MPIAllReduce_auto_arr2_doub
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr2_doub
        module procedure MPIScatter_auto_arr2_doub
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr2_doub
        module procedure MPIAllGather_auto_arr2_doub
        module procedure MPIAllGather_auto2_arr2_doub
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr2_doub
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr2_doub
        module procedure MPIGather_auto_arr2_doub
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr2_doub
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr2_doub
        module procedure MPIScatterV_len2_arr2_doub
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr2_doub
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr2_doub
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr2_doub
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr2_doub
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr2_doub
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr2_doub
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr2_doub
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr2_doub
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr2_doub
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr2_doub
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr2_doub
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr2_doub
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr2_doub
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr2_doub
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr2_doub
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr2_doub
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr2_doub
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr2_doub
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr2_doub
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr2_doub
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr2_doub
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr2_doub
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr2_doub
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr2_doub
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr2_doub
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr2_doub
    end interface

    interface MPISend
        module procedure MPISend_arr2_doub
    end interface

contains
    subroutine MPIReduce_len_arr2_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr2_doub (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr2_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr2_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr2_doub (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr2_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr2_doub(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr2_doub (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        real(dp), intent(in) :: v(:,:)
        real(dp), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr2_doub (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        real(dp), intent(in) :: v(:,:)
        real(dp), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr2_doub(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:,:)
        real(dp), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr2_doub(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:,:)
        real(dp), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr2_doub (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr2_doub (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr2_doub (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr2_doub (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                    MPI_DOUBLE_PRECISION, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr2_doub (v, SendSize, ret, RecvSize, ierr, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr2_doub (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
#if 2 != 0
        Ret(:,:) = v(:,1:size(Ret,2))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr2_doub (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr2_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr2_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr2_doub (v, ret, Lengths, Disps, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr2_doub (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr2_doub (v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:)
        real(dp), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr2_doub (v, ret, Lengths, Disps, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr2_doub (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_PRECISION, &
                val_out, Length, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:) = v(:,:,lbound(v,2+1):&
                  (lbound(v,2+1)+(Length/(1+2))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr2_doub (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr2_doub(v, Ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: Ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr2_doub (Ret, BuffSize, Source, Tag, ierr)

        real(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr2_doub (v, BuffSize, Dest, Tag, ierr)

        real(dp), intent(in), target :: v(:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr2_comp
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr2_comp
        module procedure MPIReduce_auto_arr2_comp
    end interface


    interface MPISum
        module procedure MPISum_len_arr2_comp
        module procedure MPISum_auto_arr2_comp
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr2_comp
        module procedure MPIBcast_len_arr2_comp
        module procedure MPIBcast_auto_arr2_comp
        module procedure MPIBcast_logic_arr2_comp
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr2_comp
        module procedure MPISumAll_auto_arr2_comp
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr2_comp
        module procedure MPIAllReduce_auto_arr2_comp
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr2_comp
        module procedure MPIScatter_auto_arr2_comp
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr2_comp
        module procedure MPIAllGather_auto_arr2_comp
        module procedure MPIAllGather_auto2_arr2_comp
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr2_comp
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr2_comp
        module procedure MPIGather_auto_arr2_comp
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr2_comp
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr2_comp
        module procedure MPIScatterV_len2_arr2_comp
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr2_comp
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr2_comp
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr2_comp
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr2_comp
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr2_comp
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr2_comp
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr2_comp
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr2_comp
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr2_comp
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr2_comp
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr2_comp
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr2_comp
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr2_comp
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr2_comp
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr2_comp
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr2_comp
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr2_comp
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr2_comp
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr2_comp
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr2_comp
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr2_comp
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr2_comp
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr2_comp
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr2_comp
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr2_comp
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr2_comp
    end interface

    interface MPISend
        module procedure MPISend_arr2_comp
    end interface

contains
    subroutine MPIReduce_len_arr2_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr2_comp (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr2_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr2_comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr2_comp (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr2_comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr2_comp(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(out), target :: Ret(:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr2_comp (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        complex(dp), intent(in) :: v(:,:)
        complex(dp), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr2_comp (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        complex(dp), intent(in) :: v(:,:)
        complex(dp), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr2_comp(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v(:,:)
        complex(dp), intent(out) :: Ret(:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr2_comp(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v(:,:)
        complex(dp), intent(out) :: Ret(:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr2_comp (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr2_comp (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr2_comp (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr2_comp (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                    MPI_DOUBLE_COMPLEX, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr2_comp (v, SendSize, ret, RecvSize, ierr, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr2_comp (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
#if 2 != 0
        Ret(:,:) = v(:,1:size(Ret,2))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr2_comp (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr2_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr2_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr2_comp (v, ret, Lengths, Disps, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr2_comp (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr2_comp (v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:)
        complex(dp), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr2_comp (v, ret, Lengths, Disps, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: ret(:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)) &
         * (ubound(v,2+1)-lbound(v,2+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr2_comp (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_COMPLEX, &
                val_out, Length, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:) = v(:,:,lbound(v,2+1):&
                  (lbound(v,2+1)+(Length/(1+2))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr2_comp (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr2_comp(v, Ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr2_comp (Ret, BuffSize, Source, Tag, ierr)

        complex(dp), intent(out), target :: Ret(:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr2_comp (v, BuffSize, Dest, Tag, ierr)

        complex(dp), intent(in), target :: v(:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr3_int
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr3_int
        module procedure MPIReduce_auto_arr3_int
    end interface


    interface MPISum
        module procedure MPISum_len_arr3_int
        module procedure MPISum_auto_arr3_int
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr3_int
        module procedure MPIBcast_len_arr3_int
        module procedure MPIBcast_auto_arr3_int
        module procedure MPIBcast_logic_arr3_int
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr3_int
        module procedure MPISumAll_auto_arr3_int
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr3_int
        module procedure MPIAllReduce_auto_arr3_int
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr3_int
        module procedure MPIScatter_auto_arr3_int
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr3_int
        module procedure MPIAllGather_auto_arr3_int
        module procedure MPIAllGather_auto2_arr3_int
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr3_int
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr3_int
        module procedure MPIGather_auto_arr3_int
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr3_int
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr3_int
        module procedure MPIScatterV_len2_arr3_int
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr3_int
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr3_int
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr3_int
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr3_int
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr3_int
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr3_int
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr3_int
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr3_int
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr3_int
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr3_int
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr3_int
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr3_int
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr3_int
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr3_int
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr3_int
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr3_int
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr3_int
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr3_int
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr3_int
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr3_int
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr3_int
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr3_int
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr3_int
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr3_int
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr3_int
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr3_int
    end interface

    interface MPISend
        module procedure MPISend_arr3_int
    end interface

contains
    subroutine MPIReduce_len_arr3_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr3_int (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_INTEGER4, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr3_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr3_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr3_int (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr3_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr3_int(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr3_int (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int32), intent(in) :: v(:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr3_int (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int32), intent(in) :: v(:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr3_int(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr3_int(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr3_int (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr3_int (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr3_int (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr3_int (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                    MPI_INTEGER4, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr3_int (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr3_int (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER4, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
#if 3 != 0
        Ret(:,:,:) = v(:,:,1:size(Ret,3))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr3_int (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr3_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr3_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr3_int (v, ret, Lengths, Disps, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)) &
         * (ubound(v,3+1)-lbound(v,3+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr3_int (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr3_int (v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr3_int (v, ret, Lengths, Disps, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)) &
         * (ubound(v,3+1)-lbound(v,3+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr3_int (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER4, &
                val_out, Length, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:,:) = v(:,:,:,lbound(v,3+1):&
                  (lbound(v,3+1)+(Length/(1+3))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr3_int (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr3_int(v, Ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr3_int (Ret, BuffSize, Source, Tag, ierr)

        integer(int32), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr3_int (v, BuffSize, Dest, Tag, ierr)

        integer(int32), intent(in), target :: v(:,:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr3_doub
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr3_doub
        module procedure MPIReduce_auto_arr3_doub
    end interface


    interface MPISum
        module procedure MPISum_len_arr3_doub
        module procedure MPISum_auto_arr3_doub
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr3_doub
        module procedure MPIBcast_len_arr3_doub
        module procedure MPIBcast_auto_arr3_doub
        module procedure MPIBcast_logic_arr3_doub
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr3_doub
        module procedure MPISumAll_auto_arr3_doub
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr3_doub
        module procedure MPIAllReduce_auto_arr3_doub
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr3_doub
        module procedure MPIScatter_auto_arr3_doub
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr3_doub
        module procedure MPIAllGather_auto_arr3_doub
        module procedure MPIAllGather_auto2_arr3_doub
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr3_doub
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr3_doub
        module procedure MPIGather_auto_arr3_doub
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr3_doub
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr3_doub
        module procedure MPIScatterV_len2_arr3_doub
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr3_doub
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr3_doub
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr3_doub
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr3_doub
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr3_doub
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr3_doub
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr3_doub
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr3_doub
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr3_doub
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr3_doub
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr3_doub
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr3_doub
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr3_doub
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr3_doub
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr3_doub
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr3_doub
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr3_doub
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr3_doub
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr3_doub
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr3_doub
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr3_doub
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr3_doub
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr3_doub
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr3_doub
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr3_doub
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr3_doub
    end interface

    interface MPISend
        module procedure MPISend_arr3_doub
    end interface

contains
    subroutine MPIReduce_len_arr3_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr3_doub (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr3_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr3_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr3_doub (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr3_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr3_doub(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr3_doub (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        real(dp), intent(in) :: v(:,:,:)
        real(dp), intent(out) :: Ret(:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr3_doub (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        real(dp), intent(in) :: v(:,:,:)
        real(dp), intent(out) :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr3_doub(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:,:,:)
        real(dp), intent(out) :: Ret(:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr3_doub(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:,:,:)
        real(dp), intent(out) :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr3_doub (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr3_doub (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr3_doub (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr3_doub (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                    MPI_DOUBLE_PRECISION, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr3_doub (v, SendSize, ret, RecvSize, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr3_doub (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
#if 3 != 0
        Ret(:,:,:) = v(:,:,1:size(Ret,3))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr3_doub (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr3_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr3_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr3_doub (v, ret, Lengths, Disps, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)) &
         * (ubound(v,3+1)-lbound(v,3+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr3_doub (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr3_doub (v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr3_doub (v, ret, Lengths, Disps, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)) &
         * (ubound(v,3+1)-lbound(v,3+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr3_doub (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_PRECISION, &
                val_out, Length, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:,:) = v(:,:,:,lbound(v,3+1):&
                  (lbound(v,3+1)+(Length/(1+3))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr3_doub (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr3_doub(v, Ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr3_doub (Ret, BuffSize, Source, Tag, ierr)

        real(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr3_doub (v, BuffSize, Dest, Tag, ierr)

        real(dp), intent(in), target :: v(:,:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr3_comp
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr3_comp
        module procedure MPIReduce_auto_arr3_comp
    end interface


    interface MPISum
        module procedure MPISum_len_arr3_comp
        module procedure MPISum_auto_arr3_comp
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr3_comp
        module procedure MPIBcast_len_arr3_comp
        module procedure MPIBcast_auto_arr3_comp
        module procedure MPIBcast_logic_arr3_comp
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr3_comp
        module procedure MPISumAll_auto_arr3_comp
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr3_comp
        module procedure MPIAllReduce_auto_arr3_comp
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr3_comp
        module procedure MPIScatter_auto_arr3_comp
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr3_comp
        module procedure MPIAllGather_auto_arr3_comp
        module procedure MPIAllGather_auto2_arr3_comp
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr3_comp
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr3_comp
        module procedure MPIGather_auto_arr3_comp
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr3_comp
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr3_comp
        module procedure MPIScatterV_len2_arr3_comp
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr3_comp
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr3_comp
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr3_comp
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr3_comp
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr3_comp
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr3_comp
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr3_comp
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr3_comp
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr3_comp
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr3_comp
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr3_comp
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr3_comp
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr3_comp
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr3_comp
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr3_comp
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr3_comp
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr3_comp
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr3_comp
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr3_comp
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr3_comp
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr3_comp
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr3_comp
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr3_comp
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr3_comp
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr3_comp
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr3_comp
    end interface

    interface MPISend
        module procedure MPISend_arr3_comp
    end interface

contains
    subroutine MPIReduce_len_arr3_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr3_comp (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr3_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr3_comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr3_comp (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr3_comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr3_comp(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(out), target :: Ret(:,:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr3_comp (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        complex(dp), intent(in) :: v(:,:,:)
        complex(dp), intent(out) :: Ret(:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr3_comp (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        complex(dp), intent(in) :: v(:,:,:)
        complex(dp), intent(out) :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr3_comp(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v(:,:,:)
        complex(dp), intent(out) :: Ret(:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr3_comp(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v(:,:,:)
        complex(dp), intent(out) :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr3_comp (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr3_comp (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr3_comp (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr3_comp (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v(:,:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                    MPI_DOUBLE_COMPLEX, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr3_comp (v, SendSize, ret, RecvSize, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr3_comp (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
#if 3 != 0
        Ret(:,:,:) = v(:,:,1:size(Ret,3))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr3_comp (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr3_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr3_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr3_comp (v, ret, Lengths, Disps, Node)

        complex(dp), intent(in), target :: v(:,:,:,:)
        complex(dp), intent(inout), target :: ret(:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)) &
         * (ubound(v,3+1)-lbound(v,3+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr3_comp (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr3_comp (v, ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:)
        complex(dp), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr3_comp (v, ret, Lengths, Disps, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:,:)
        complex(dp), intent(inout), target :: ret(:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)) &
         * (ubound(v,3+1)-lbound(v,3+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr3_comp (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_COMPLEX, &
                val_out, Length, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:,:) = v(:,:,:,lbound(v,3+1):&
                  (lbound(v,3+1)+(Length/(1+3))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr3_comp (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        complex(dp), intent(in), target :: v(:,:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr3_comp(v, Ret, ierr, Node)

        complex(dp), intent(in), target :: v(:,:,:,:)
        complex(dp), intent(inout), target :: Ret(:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)), MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr3_comp (Ret, BuffSize, Source, Tag, ierr)

        complex(dp), intent(out), target :: Ret(:,:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr3_comp (v, BuffSize, Dest, Tag, ierr)

        complex(dp), intent(in), target :: v(:,:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module

#if !defined(SX)


#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr4_int
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr4_int
        module procedure MPIReduce_auto_arr4_int
    end interface


    interface MPISum
        module procedure MPISum_len_arr4_int
        module procedure MPISum_auto_arr4_int
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr4_int
        module procedure MPIBcast_len_arr4_int
        module procedure MPIBcast_auto_arr4_int
        module procedure MPIBcast_logic_arr4_int
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr4_int
        module procedure MPISumAll_auto_arr4_int
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr4_int
        module procedure MPIAllReduce_auto_arr4_int
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr4_int
        module procedure MPIScatter_auto_arr4_int
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr4_int
        module procedure MPIAllGather_auto_arr4_int
        module procedure MPIAllGather_auto2_arr4_int
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr4_int
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr4_int
        module procedure MPIGather_auto_arr4_int
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr4_int
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr4_int
        module procedure MPIScatterV_len2_arr4_int
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr4_int
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr4_int
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr4_int
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr4_int
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr4_int
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr4_int
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr4_int
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr4_int
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr4_int
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr4_int
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr4_int
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr4_int
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr4_int
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr4_int
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr4_int
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr4_int
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr4_int
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr4_int
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr4_int
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr4_int
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr4_int
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr4_int
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr4_int
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr4_int
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr4_int
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr4_int
    end interface

    interface MPISend
        module procedure MPISend_arr4_int
    end interface

contains
    subroutine MPIReduce_len_arr4_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr4_int (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER4, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr4_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,:,:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr4_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr4_int (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr4_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr4_int(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr4_int (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int32), intent(in) :: v(:,:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr4_int (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int32), intent(in) :: v(:,:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr4_int(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:,:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr4_int(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:,:,:,:)
        integer(int32), intent(out) :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr4_int (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr4_int (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr4_int (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr4_int (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:,:,:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                    MPI_INTEGER4, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr4_int (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr4_int (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER4, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
#if 4 != 0
        Ret(:,:,:,:) = v(:,:,:,1:size(Ret,4))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr4_int (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr4_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr4_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,:,:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr4_int (v, ret, Lengths, Disps, Node)

        integer(int32), intent(in), target :: v(:,:,:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)) &
         * (ubound(v,4+1)-lbound(v,4+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr4_int (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr4_int (v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr4_int (v, ret, Lengths, Disps, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:,:)
        integer(int32), intent(inout), target :: ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)) &
         * (ubound(v,4+1)-lbound(v,4+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr4_int (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER4, &
                val_out, Length, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:,:,:) = v(:,:,:,:,lbound(v,4+1):&
                  (lbound(v,4+1)+(Length/(1+4))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr4_int (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int32), intent(in), target :: v(:,:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr4_int(v, Ret, ierr, Node)

        integer(int32), intent(in), target :: v(:,:,:,:,:)
        integer(int32), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr4_int (Ret, BuffSize, Source, Tag, ierr)

        integer(int32), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr4_int (v, BuffSize, Dest, Tag, ierr)

        integer(int32), intent(in), target :: v(:,:,:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module
#endif




#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr4_doub
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr4_doub
        module procedure MPIReduce_auto_arr4_doub
    end interface


    interface MPISum
        module procedure MPISum_len_arr4_doub
        module procedure MPISum_auto_arr4_doub
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr4_doub
        module procedure MPIBcast_len_arr4_doub
        module procedure MPIBcast_auto_arr4_doub
        module procedure MPIBcast_logic_arr4_doub
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr4_doub
        module procedure MPISumAll_auto_arr4_doub
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr4_doub
        module procedure MPIAllReduce_auto_arr4_doub
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr4_doub
        module procedure MPIScatter_auto_arr4_doub
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr4_doub
        module procedure MPIAllGather_auto_arr4_doub
        module procedure MPIAllGather_auto2_arr4_doub
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr4_doub
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr4_doub
        module procedure MPIGather_auto_arr4_doub
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr4_doub
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr4_doub
        module procedure MPIScatterV_len2_arr4_doub
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr4_doub
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr4_doub
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr4_doub
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr4_doub
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr4_doub
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr4_doub
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr4_doub
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr4_doub
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr4_doub
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr4_doub
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr4_doub
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr4_doub
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr4_doub
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr4_doub
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr4_doub
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr4_doub
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr4_doub
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr4_doub
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr4_doub
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr4_doub
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr4_doub
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr4_doub
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr4_doub
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr4_doub
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr4_doub
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr4_doub
    end interface

    interface MPISend
        module procedure MPISend_arr4_doub
    end interface

contains
    subroutine MPIReduce_len_arr4_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr4_doub (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr4_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,:,:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr4_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr4_doub (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr4_doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr4_doub(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr4_doub (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        real(dp), intent(in) :: v(:,:,:,:)
        real(dp), intent(out) :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr4_doub (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        real(dp), intent(in) :: v(:,:,:,:)
        real(dp), intent(out) :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr4_doub(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:,:,:,:)
        real(dp), intent(out) :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr4_doub(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v(:,:,:,:)
        real(dp), intent(out) :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr4_doub (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr4_doub (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr4_doub (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr4_doub (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v(:,:,:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                    MPI_DOUBLE_PRECISION, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr4_doub (v, SendSize, ret, RecvSize, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr4_doub (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
#if 4 != 0
        Ret(:,:,:,:) = v(:,:,:,1:size(Ret,4))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr4_doub (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr4_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr4_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,:,:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr4_doub (v, ret, Lengths, Disps, Node)

        real(dp), intent(in), target :: v(:,:,:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)) &
         * (ubound(v,4+1)-lbound(v,4+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr4_doub (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr4_doub (v, ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr4_doub (v, ret, Lengths, Disps, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:,:)
        real(dp), intent(inout), target :: ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)) &
         * (ubound(v,4+1)-lbound(v,4+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr4_doub (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_PRECISION, &
                val_out, Length, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:,:,:) = v(:,:,:,:,lbound(v,4+1):&
                  (lbound(v,4+1)+(Length/(1+4))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr4_doub (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        real(dp), intent(in), target :: v(:,:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr4_doub(v, Ret, ierr, Node)

        real(dp), intent(in), target :: v(:,:,:,:,:)
        real(dp), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr4_doub (Ret, BuffSize, Source, Tag, ierr)

        real(dp), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr4_doub (v, BuffSize, Dest, Tag, ierr)

        real(dp), intent(in), target :: v(:,:,:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr4_int64
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr4_int64
        module procedure MPIReduce_auto_arr4_int64
    end interface


    interface MPISum
        module procedure MPISum_len_arr4_int64
        module procedure MPISum_auto_arr4_int64
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr4_int64
        module procedure MPIBcast_len_arr4_int64
        module procedure MPIBcast_auto_arr4_int64
        module procedure MPIBcast_logic_arr4_int64
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr4_int64
        module procedure MPISumAll_auto_arr4_int64
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr4_int64
        module procedure MPIAllReduce_auto_arr4_int64
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr4_int64
        module procedure MPIScatter_auto_arr4_int64
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr4_int64
        module procedure MPIAllGather_auto_arr4_int64
        module procedure MPIAllGather_auto2_arr4_int64
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr4_int64
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr4_int64
        module procedure MPIGather_auto_arr4_int64
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr4_int64
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr4_int64
        module procedure MPIScatterV_len2_arr4_int64
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr4_int64
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr4_int64
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr4_int64
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr4_int64
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr4_int64
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr4_int64
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr4_int64
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr4_int64
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr4_int64
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr4_int64
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr4_int64
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr4_int64
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr4_int64
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr4_int64
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr4_int64
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr4_int64
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr4_int64
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr4_int64
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr4_int64
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr4_int64
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr4_int64
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr4_int64
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr4_int64
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr4_int64
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr4_int64
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr4_int64
    end interface

    interface MPISend
        module procedure MPISend_arr4_int64
    end interface

contains
    subroutine MPIReduce_len_arr4_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr4_int64 (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER8, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr4_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(:,:,:,with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr4_int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr4_int64 (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr4_int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr4_int64(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(out), target :: Ret(:,:,:,:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr4_int64 (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int64), intent(in) :: v(:,:,:,:)
        integer(int64), intent(out) :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr4_int64 (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int64), intent(in) :: v(:,:,:,:)
        integer(int64), intent(out) :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr4_int64(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v(:,:,:,:)
        integer(int64), intent(out) :: Ret(:,:,:,:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr4_int64(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v(:,:,:,:)
        integer(int64), intent(out) :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr4_int64 (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:,:,:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr4_int64 (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:,:,:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr4_int64 (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:,:,:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr4_int64 (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v(:,:,:,:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                    MPI_INTEGER8, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr4_int64 (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr4_int64 (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER8, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
#if 4 != 0
        Ret(:,:,:,:) = v(:,:,:,1:size(Ret,4))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr4_int64 (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:,:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr4_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(inout), target :: ret(:,:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_arr4_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(inout), target :: ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(:,:,:,1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_arr4_int64 (v, ret, Lengths, Disps, Node)

        integer(int64), intent(in), target :: v(:,:,:,:,:)
        integer(int64), intent(inout), target :: ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)) &
         * (ubound(v,4+1)-lbound(v,4+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(:,:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:,:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_arr4_int64 (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_arr4_int64 (v, ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer(int64), intent(inout), target :: ret(:,:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,:,1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_arr4_int64 (v, ret, Lengths, Disps, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:,:,:)
        integer(int64), intent(inout), target :: ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)
        LengthsN = int(Lengths &
         * ((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)),MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(((ubound(v,1)-lbound(v,1)+1)*(ubound(v,2)-lbound(v,2)+1)*&
(ubound(v,3)-lbound(v,3)+1)*(ubound(v,4)-lbound(v,4)+1)) &
         * (ubound(v,4+1)-lbound(v,4+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(:,:,:,:,Disps(1)+1:Disps(1)+Lengths(1)) = v(:,:,:,:,:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_arr4_int64 (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:,:,:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER8, &
                val_out, Length, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:,:,:,:,:) = v(:,:,:,:,lbound(v,4+1):&
                  (lbound(v,4+1)+(Length/(1+4))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_arr4_int64 (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int64), intent(in), target :: v(:,:,:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,:,1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_arr4_int64(v, Ret, ierr, Node)

        integer(int64), intent(in), target :: v(:,:,:,:,:)
        integer(int64), intent(inout), target :: Ret(:,:,:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(((ubound(ret,1)-lbound(ret,1)+1)*(ubound(ret,2)-lbound(ret,2)+1)*&
(ubound(ret,3)-lbound(ret,3)+1)*(ubound(ret,4)-lbound(ret,4)+1)), MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(:,:,:,:,1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_arr4_int64 (Ret, BuffSize, Source, Tag, ierr)

        integer(int64), intent(out), target :: Ret(:,:,:,:)
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_arr4_int64 (v, BuffSize, Dest, Tag, ierr)

        integer(int64), intent(in), target :: v(:,:,:,:)
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



module par_internal
#if !defined(SX)
    use par_internal_int
#endif
    use par_internal_int64
    use par_internal_doub
    use par_internal_comp
#if !defined(SX)
    use par_internal_arr_int
#endif
    use par_internal_arr_int64
    use par_internal_arr_doub
    use par_internal_arr_comp
#if !defined(SX)
    use par_internal_arr2_int
#endif
    use par_internal_arr2_int64
    use par_internal_arr2_doub
    use par_internal_arr2_comp
    use par_internal_arr3_int
    use par_internal_arr3_doub
    use par_internal_arr3_comp
#if !defined(SX)
    use par_internal_arr4_int
#endif
    use par_internal_arr4_doub
    use par_internal_arr4_int64
! n.b HACK
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

! MPI interface and helper routines for parallel work.
! If compiled without the USE_MPI c pre-processing statement, then
! contains dummy routines to enable the parallel calculation algorithms to
! be used in serial.  This is useful for testing, development and as (in some
! cases) the parallel algorithms are much more efficient that the serial
! analogues, as the latter are much more general.

! NECI will run as a standalone parallel app or in conjunction with CPMD.
! Only standalone is currently implemented.

! For some jobs,
! Parallelization is done over occupied electrons.  These are split among each
! processor such that there are an approximately equal number of pairs of
! electrons on each processor.
!
! Each processor is given a set of 'Electron 1'.  From this it can generate
! a set of single excitations as well as a set of double excitations.  Double
! excitations can have any electron past Electron 1 as Electron 2.  This means
! that the lower electron numbers will have more possible pairs.
!
! Parallelization is supported by the symmetry excitation generators,
! interfaced through GenSymExcitIt3Par There is no clean way to automatically
! parallelize high-vertex graph code, so each parallel routine must be
! specifically written.

! FCIMC is parallelized over determinants, using a hashing routine.  This is
! not dealt with here

! It is highly advised that the following routines are used in parallel work,
! as they exist both in serial and parallel compiles and aid interoperability.
! The extra overhead is likely minimal compared to the communication time.

!
! MPI ROUTINES
!    MPIInit     Setup MPI and init Nodefiles if we're standalone
!    MPIEnd      Shutdown MPI
!    MPIStopAll  Abort all processors
!    MPIBarrier  Wait until all processors have synched.

! These routines exist for all types specified in the header. (Ah the beauty
! of templates!)
!    MPISum      Sum data among all processors, and give the results to all
!    MPISumRoot  Sum data among all processors, and give the results to the
!                root
!    MPIBCast    Send from processor Root to all other processors
!    MPIAllReduceDatatype Like MPIAllReduce, but allows a custom datatype
!                (like MPI_2INTEGER) to be specified
!    MPIAllGather Gathers data from all tasks and sends it to all.
!    MPIRecv     Receive data from a single node

!

! OTHER ROUTINES
!    GetProcElectrons


    use MPI_wrapper
    use constants
    implicit none

    interface
        subroutine gethostname(nm, sz) bind(c)
            import :: c_size_t, c_char
            implicit none
            integer(c_size_t), value :: sz
            character(kind=c_char), intent(out) :: nm
        end subroutine
    end interface
    save
    integer :: iProcMinE, iProcMaxE, nProcessors
    logical :: neci_MPIInit_called, neci_MPINodes_called

contains

    subroutine MPIInit (tExternal)
        implicit none
        ! Determine the number of processors, and fork each off to its own
        ! NodeFile output file
        !
        ! In:
        !   tExternal True if using VASP/CPMD's MPI interface, so we don't
        ! have to initialise our own.

        logical, intent(in) :: tExternal
        integer(MPIArg) :: iProcInd, nProcess
        integer(MPIArg) :: ierr
        integer :: a, g
        character(len=20) NodeFile
        logical(MPIArg) :: initialised
        character(*), parameter :: t_r = 'MPIInit'

    if (neci_MPIInit_called) return
    neci_MPIInit_called = .true.
#ifdef USE_MPI
        nNodes = 0  !Indicate we haven't setup nodes yet

        call MPI_Initialized(initialised, ierr)
        if (ierr /= MPI_SUCCESS) call stop_all (t_r, 'MPI Error. Terminating.')

        if (.not. initialised) then
            call MPI_Init (ierr)
            if (ierr /= MPI_SUCCESS) then
                call stop_all (t_r, 'MPI Error. Terminating.')
            end if
        end if

        ! If we are using the C bindings, this just sets it to 0
        CommGlobal = MPI_COMM_WORLD

        call MPI_Comm_rank (CommGlobal, iProcInd, ierr)
        iProcIndex = iProcInd
        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

        call MPI_Comm_size (CommGlobal, nProcess, ierr)

        !initialize intra node and inter node communicators
#ifdef SHARED_MEM_
        !the 'intra' range is the full node for now.
        call MPI_Comm_split_type(CommGlobal,MPI_COMM_TYPE_SHARED,0_MPIArg,MPI_INFO_NULL,mpi_comm_intra,ierr)
        call MPI_Comm_rank(mpi_comm_intra,iProcIndex_intra,ierr)

        call MPI_Comm_split(CommGlobal,iProcIndex_intra,int(iProcIndex,MPIArg),mpi_comm_inter,ierr)
        call MPI_Comm_rank(mpi_comm_inter,iProcIndex_inter,ierr)
#else
! no sharing between tasks, i.e. the 'intra' task range is 1
        mpi_comm_intra=MPI_COMM_SELF
        iProcIndex_intra=0

        mpi_comm_inter=MPI_COMM_WORLD
        iProcIndex_inter=iProcIndex
#endif
        nProcessors = nProcess
        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
        if (iProcIndex == 0) &
            write(stdout,*) 'Number of processors: ', nProcessors

        mpiInfoNull = MPI_INFO_NULL

        if(tExternal) then
            write(stdout,*) "NECI Processor ",iProcIndex+1,'/',nProcessors
        else

!Test if I/O is allowed on all processors - get res, the attribute attached to the communicator concerning I/O
!This does not seem to work...
!      CALL MPI_Comm_get_attr(MPI_COMM_SELF,MPI_IO,res,flag,ierr)
!flag will say if can do I/O
!      Local_IO=(ierr.eq.MPI_SUCCESS.and.flag.and.res.ne.MPI_PROC_NULL)
!      IF(.not.Local_IO) THEN
!          WRITE(stdout,*) ierr,Local_IO,flag,res
!          CALL Stop_All('MPIInit',"IO not possible on this processor")
!      ELSE
!          WRITE(stdout,*) "IO possible on this processor"
!          CALL neci_flush(stdout)
!      ENDIF

            if (iProcIndex == 0) then
                write(stdout,*) "Processor ", iProcIndex+1, '/', nProcessors, &
                           ' as head node.'
            else
#ifdef DEBUG_
                write(stdout,*) "Processor ", iProcIndex+1, '/', nProcessors, &
                           ' moving to local output.'
#endif

                write(NodeFile, &
                    '("NodeFile",' // int_fmt(iProcIndex+1) // ')') &
                    iProcIndex + 1

#ifdef DEBUG_
                write(stdout,*) "outfile=", NodeFile
#endif
                close(stdout, status="keep")
#ifdef DEBUG_
                open(stdout, file=NodeFile, recl=8192)
                write(stdout,*) "Processor ", iProcIndex+1, '/', nProcessors, &
                           ' on local output.'
#else
                open(stdout, file="/dev/null")
#endif
            endif

            call GetProcElectrons (iProcIndex, iProcMinE, iProcMaxE)

            ! Just synchronize everything briefly
            a = iProcIndex+1
            call MPISumAll (a, 1, g)
            write(stdout,*) "Sum: ", g
        endif

        ! Don't bother with this if using c bindings (probably
        ! should though)
        call MPI_ERRHANDLER_SET(CommGlobal,MPI_ERRORS_RETURN,ierr)
#else

       ! Dummy set up for serial work.
       iProcIndex = 0
       nProcessors = 1
       nNodes = 1
#endif

    end subroutine



    ! Create communicators for within and between each of the nodes
    ! As yet a work in progress, but hopefully the structure is there and
    ! usale. ParallelHelper.F90 contains various globals for this work.

    ! A parallel job is parallellized over many cores (denoted processors
    ! here). Various parallel architectures make it useful to introduce the
    ! concept of nodes, which are a group of processors with a (fast-access)
    ! shared memory space. These may be physical nodes (in which case all the
    ! cores on all the physical CPUs sharing the same motherboard and memory)
    ! would be grouped together into a node, or alternatively, may be
    ! subgroups of this set.

    ! Currently nodes are determined as those cores which have the same
    ! hostname. This is not overly portable, and will need to be customized
    ! for different parallel architectures.

    ! Each node has a node root core designated.

    ! With the concept of nodes comes the introduction of different levels
    ! of communicators.
    !
    !   CommGlobal      is a synonym for MPI_COMM_WORLD and communicates
    !                   between all cores
    !   CommNodes(i)    is the communicator for node i (zero-based)
    !   CommRoots       is the communicator between roots of all nodes
    !
    ! Explicit use of these should be avoided, as almost all the MPI...
    ! functions in this module take some optional arguments to make the
    ! communicator selction easier.
    !
    ! e.g.
    !    !The following work for most MPI... functions
    !    integer :: i
    !    MPIBCast(i)            ! This will broadcast with CommGlobal
    !    MPIBCast(i,Node)       ! This will broadcast with the communicator
    !                             within each node
    !    if(bNodeRoot) then
    !       MPIBCast(i,Roots)   ! Broadcast between roots of all nodes.
    !                             Remember the IF, otherwise horrors occur.
    !    endif
    ! (TODO: Perhaps the if should be put inside the MPIBCast?)
    !
    !
    !    !Now for BCast-specfic ones.
    !    MPIBCast(i,.true.)     ! Broadcast to all cores from this core
    !    ! MPIBCast(i,.false.)  ! This should be on the other cores
    !                             otherwise more horrors
    !    MPIBCast(i,.true.,Node)!Broadcast to the node from this code
    !    ! etc
    !
    !    Node is of the TYPE(CommI) which allows overloading of these
    !    functions. For actual indices, use
    !      iProcIndex       ! index of this Core.  0...(nProcessors-1)
    !      iNodeIndex       ! index of this node.  0...(nNodes-1)
    !      iIndexInNode     ! index of this core in this node.
    !                         0...(NodeLengths(iNodeIndex)-1)
    !      ProcNode(i)      ! The node which Core i is on
    !
    ! Currently FCIMCPar is setup to allocate one core per node.  This is a
    ! hack for the moment. There's a more sophisticated system of logical
    ! nodes, specified by CALC/LOGICALNODESIZE.
    !
    ! Shared memory has been modified to deal with this.  If shared memory
    ! within only a logical node is required use a form like
    !
    !   call shared_allocate_iluts ("DetList", DetList, (/nIfTot,nMaxAmpl/), &
    !                               iNodeIndex)
    !
    !  where iNodeIndex is the logical node.  Otherwise shared memory is
    !  over physical nodes, irrespective of the logical node structure.

    subroutine MPINodes (tUseProcsAsNodes)

        logical, intent(in) :: tUseProcsAsNodes !Set if we use Procs as Nodes.
!        character(*), parameter :: t_r = 'MPINodes'

        integer :: Group, i, j, n, GroupProc(nProcessors), ierr
        integer(MPIArg) :: GroupDum, GroupRootsDum, CommRootDum
        integer(MPIArg), allocatable :: GroupNodesDum(:), CommNodesDum(:)
        integer, allocatable :: NodesDum(:)
        integer(c_size_t) length
        character(len=30) :: nm, nm2, nms(0:nProcessors-1)

        ! Only initialise once
        if (neci_MPINodes_called) return
        neci_MPINodes_called = .true.

        ! Allocate (temporary) memory
        if(.not.allocated(Nodes)) allocate(Nodes(0:nProcessors-1))
        if(.not.allocated(ProcNode)) allocate(ProcNode(0:nProcessors-1))
        if(.not.allocated(NodeRoots)) allocate(NodeRoots(0:nProcessors-1))
#ifdef USE_MPI
        if(.not.allocated(NodeLengths)) allocate(NodeLengths(0:nProcessors)) !Temp allocate for now
        NodeLengths = 0

        if (iLogicalNodeSize /= 0) &
            write(stdout,*)  "Using Logical Node Size ", iLogicalNodeSize

        if (tUseProcsAsNodes) then
            call MPICommGroup (CommGlobal, GroupDum, ierr)

            Group=GroupDum
            write(stdout,*) "Allocating each processor as a separate node."
            if(iProcIndex==root) then
                nNodes=0
#ifdef DEBUG_
                write(stdout,*) "Processor      Node"
#endif
                do i=0,nProcessors-1
                    NodeRoots(nNodes) = i
                    ProcNode(i) = nNodes
                    Nodes(i)%n = nNodes
                    nNodes = nNodes+1
#ifdef DEBUG_
                    write(stdout,"(2I10)") i, Nodes(i)%n
#endif
                enddo
            endif
        else
            call MPICommGroup(CommGlobal, GroupDum, ierr)
            Group = GroupDum
            length = 30
            call gethostname (nm,length)
            do i=1,30
                if (nm(i:i) == char(0)) then
                    nm(i:30)=' '
                    exit
                endif
            enddo
            nm2 = nm
            !No templated character type

            call MPIGather_hack (nm2, nms(0), 30, nProcessors, ierr)

            write(stdout,*) "Processor      Node hostname"
            if (iProcIndex == root) then
                nNodes = 0
                do i = 0, nProcessors-1
                    Nodes(i)%n = -1
                    do j = 0, nNodes-1
                        if (nms(i) == nms(NodeRoots(j))) then
                            if ((iLogicalNodeSize == 0) .or. &
                                 NodeLengths(j) < iLogicalNodeSize) then
                                Nodes(i)%n = j
                                ProcNode(i) = j
                                NodeLengths(j) = NodeLengths(j)+1
                                exit
                            endif
                        endif
                    enddo
                    if (Nodes(i)%n == -1) then
                        NodeRoots(nNodes) = i
                        Nodes(i)%n = nNodes
                        ProcNode(i) = nNodes
                        NodeLengths(nNodes) = NodeLengths(nNodes)+1
                        nNodes = nNodes+1
                    endif
                    write(stdout,"(2I10,A,A)") i, Nodes(i)%n, " ", nms(i)
                enddo
            endif
        endif
        deallocate(NodeLengths)
        allocate(NodesDum(0:nProcessors-1))

        do i = 0, nProcessors-1
        !Due to type checking, it now doesn't like being passed an array of
        ! CommI's. Revert them to integers, and broadcast
            NodesDum(i) = Nodes(i)%n
        enddo

        call MPIBCast(nNodes)
        call MPIBCast(NodesDum)

        do i = 0, nProcessors-1
            Nodes(i)%n = NodesDum(i)
        enddo
        deallocate(NodesDum)

        call MPIBCast(ProcNode)
        call MPIBCast(NodeRoots)

        if(.not.allocated(CommNodes)) allocate(CommNodes(0:nNodes-1))
        if(.not.allocated(CommNodesDum)) allocate(CommNodesDum(0:nNodes-1))
        if(.not.allocated(GroupNodes)) allocate(GroupNodes(0:nNodes-1))
        if(.not.allocated(GroupNodesDum)) allocate(GroupNodesDum(0:nNodes-1))
        if(.not.allocated(NodeLengths)) allocate(NodeLengths(0:nNodes-1))
        Node = Nodes(iProcIndex)
        iNodeIndex = Node%n  !Used as a plain integer version.
     !   write(stdout,*) iNodeIndex,iProcIndex,Node%n,size(NodeRoots)
     !   write(stdout,*) "***************************************"
     !   call neci_flush(stdout)

        if (iProcIndex == NodeRoots(Node%n)) then
            bNodeRoot = .true.
            write(stdout,*) "I am the node root for node ", Node%n
        else
            bNodeRoot = .false.
        endif
        do i = 0, nNodes-1
            n = 0
            do j = 0, nProcessors-1
                if (Nodes(j)%n == i) then
                    if (j == iProcIndex) iIndexInNode = n
                    n = n + 1
                    GroupProc(n) = j
                endif
            enddo
            NodeLengths(i) = n
            GroupNodesDum = GroupNodes

            ! Create a group
            call MPIGroupIncl (Group, n, GroupProc, GroupNodesDum(i), ierr)

            GroupNodes = GroupNodesDum
            CommNodesDum = int(CommNodes,MPIArg)

            ! Create the communicator
            call MPICommcreate (CommGlobal, GroupNodes(i), CommNodesDum(i),&
                                ierr)
            CommNodes = CommNodesDum
        enddo

        ! Create a group
        GroupRootsDum = GroupRoots
        call MPIGroupIncl(Group, nNodes, NodeRoots, GroupRootsDum, ierr)
        GroupRoots = GroupRootsDum

        ! Create the communicator
        CommRootDum = CommRoot
        call MPICommcreate(CommGlobal, GroupRoots, CommRootDum, ierr)
        CommRoot = CommRootDum
#else
        ! In serial
        nNodes = 1
        iIndexInNode = 0
        ProcNode(0) = 0
        NodeRoots(0) = 0
        bNodeRoot = .true.
        if(.not.allocated(NodeLengths)) allocate(NodeLengths(0:nNodes-1))
        if(.not.allocated(CommNodes)) allocate(CommNodes(0:nNodes-1))
        if(.not.allocated(GroupNodes)) allocate(GroupNodes(0:nNodes-1))
        NodeLengths(0) = 1
#endif /* def USE_MPI */
        Roots%n = -1  !A communicator index between roots
    end subroutine

    subroutine MPIEnd (tExternal)

       ! Shutdown our MPI Interface if we're not using CPMD/VASP's
       !
       ! In:
       !   tExternal Set if using an external program's MPI interface
       !             (currently CPMD or VASP), in which case the external
       !             program handles MPI termination.

       logical, intent(in) :: tExternal
       integer(MPIArg) :: ierr

#ifdef USE_MPI
#ifdef SHARED_MEM_
        call mpi_comm_free(mpi_comm_intra,ierr)
        call mpi_comm_free(mpi_comm_inter,ierr)
#endif
        if(.not.tExternal) then
            call MPI_Finalize(ierr)
        endif
#endif

    end subroutine



    subroutine MPIStopAll(error_code)
        use MolproPlugin, only : MolproPluginTerm
        use fortran_strings, only: str

        !  Abort all processors.
        !
        !  In:
        !     error_code: Error code to return.

        integer, intent(in) :: error_code
        integer(MPIArg) :: ierr

#ifdef USE_MPI
        CALL MolproPluginTerm(-1)

        ! errorcode: Error returned to invoking environment.
        ! ierror: error status (of abort: was abort successful?)
        ! Currently neither are analysed.
        call MPI_Abort(CommGlobal, int(error_code, kind=MPIArg), ierr)
#endif

        CALL neci_flush(stdout)
        stop
    end subroutine



    subroutine GetProcElectrons(iProcIndex,iMinElec,iMaxElec)

        ! Use statement here, so it doesn't get passed onto things which
        ! use Parallel_neci
        use SystemData, only: nel

        ! Choose min and max electrons such that ordered pairs are distributed
        ! evenly across processors
        !
        ! In:
        !    iProcIndex  Index of this processor (starting at 1).
        ! Out:
        !    iMinElec    First electron to allocate to this processor.
        !    iMaxElec    Last electron to allocate to this processor.
        implicit none
        integer iProcIndex,iMinElec,iMaxElec
        real(dp) nCur

#ifdef USE_MPI
        ! Invert X=n(n-1)/2
        nCur = ((nProcessors+1-iProcIndex)*nEl*(nEl-1.0_dp)/nProcessors)

        nCur = nEl+1-(1+sqrt(1.0_dp+4*nCur))/2
        ! Hitting smack bang on an integer causes problems
        if (ceiling(nCur) == floor(nCur)) &
            nCur=nCur-1e-6_dp
        iMinElec=ceiling(nCur)

        if (iProcIndex == 1) &
            iMinElec=1

        nCur = ((nProcessors-iProcIndex)*nEl*(nEl-1.0_dp)/nProcessors)
        nCur = nEl+1-(1+sqrt(1.0_dp+4*nCur))/2

        ! Hitting smack bang on an integer causes problems
        if (ceiling(nCur) == floor(nCur)) &
            nCur = nCur - 1e-6_dp
        iMaxElec = floor(nCur)

        if (iProcIndex == nProcessors) &
            iMaxElec = nEl
#else
        ! Serial calculation: all electrons on one processor.
        iMinElec = 1
        iMaxElec = nEl
#endif
    end subroutine

    subroutine clean_parallel()
        if(allocated(Nodes)) deallocate(Nodes)
        if(allocated(ProcNode)) deallocate(ProcNode)
        if(allocated(NodeRoots)) deallocate(NodeRoots)
        if(allocated(NodeLengths)) deallocate(NodeLengths)
        if(allocated(CommNodes)) deallocate(CommNodes)
        if(allocated(CommNodesDum)) deallocate(CommNodesDum)
        if(allocated(GroupNodes)) deallocate(GroupNodes)
        if(allocated(GroupNodesDum)) deallocate(GroupNodesDum)
        if(allocated(NodeLengths)) deallocate(NodeLengths)
    end subroutine

end module