Parallel.F90 Source File


Contents

Source Code


Source Code

#if !defined(SX)


#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_int
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_int
        module procedure MPIReduce_auto_int
    end interface


    interface MPISum
        module procedure MPISum_len_int
        module procedure MPISum_auto_int
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_int
        module procedure MPIBcast_len_int
        module procedure MPIBcast_auto_int
        module procedure MPIBcast_logic_int
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_int
        module procedure MPISumAll_auto_int
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_int
        module procedure MPIAllReduce_auto_int
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_int
        module procedure MPIScatter_auto_int
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_int
        module procedure MPIAllGather_auto_int
        module procedure MPIAllGather_auto2_int
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_int
    end interface


    interface MPIGather
        module procedure MPIGather_len_int
        module procedure MPIGather_auto_int
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_int
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_int
        module procedure MPIScatterV_len2_int
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_int
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_int
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_int
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_int
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_int
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_int
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_int
    end interface

    interface MPISum_len
        module procedure MPISum_len_int
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_int
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_int
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_int
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_int
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_int
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_int
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_int
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_int
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_int
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_int
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_int
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_int
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_int
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_int
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_int
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_int
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_int
    end interface

    interface MPIRecv
        module procedure MPIRecv_int
    end interface

    interface MPISend
        module procedure MPISend_int
    end interface

contains
    subroutine MPIReduce_len_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_int (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_int (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_int(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_int (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int32), intent(in) :: v
        integer(int32), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_int (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int32), intent(in) :: v
        integer(int32), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_int(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v
        integer(int32), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_int(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v
        integer(int32), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_int (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_int (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_int (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_int (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_INTEGER4, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_int (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_int (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER4, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_int (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_int (v, ret, Lengths, Disps, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_int (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_int (v, ret, ierr, Node)

        integer(int32), intent(in), target :: v
        integer(int32), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_int (v, ret, Lengths, Disps, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER4, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_int (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER4, &
                val_out, Length, &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_int (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_int(v, Ret, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_int (Ret, BuffSize, Source, Tag, ierr)

        integer(int32), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_int (v, BuffSize, Dest, Tag, ierr)

        integer(int32), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER4, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module
#endif




#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_int64
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_int64
        module procedure MPIReduce_auto_int64
    end interface


    interface MPISum
        module procedure MPISum_len_int64
        module procedure MPISum_auto_int64
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_int64
        module procedure MPIBcast_len_int64
        module procedure MPIBcast_auto_int64
        module procedure MPIBcast_logic_int64
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_int64
        module procedure MPISumAll_auto_int64
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_int64
        module procedure MPIAllReduce_auto_int64
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_int64
        module procedure MPIScatter_auto_int64
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_int64
        module procedure MPIAllGather_auto_int64
        module procedure MPIAllGather_auto2_int64
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_int64
    end interface


    interface MPIGather
        module procedure MPIGather_len_int64
        module procedure MPIGather_auto_int64
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_int64
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_int64
        module procedure MPIScatterV_len2_int64
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_int64
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_int64
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_int64
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_int64
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_int64
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_int64
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_int64
    end interface

    interface MPISum_len
        module procedure MPISum_len_int64
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_int64
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_int64
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_int64
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_int64
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_int64
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_int64
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_int64
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_int64
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_int64
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_int64
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_int64
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_int64
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_int64
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_int64
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_int64
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_int64
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_int64
    end interface

    interface MPIRecv
        module procedure MPIRecv_int64
    end interface

    interface MPISend
        module procedure MPISend_int64
    end interface

contains
    subroutine MPIReduce_len_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_int64 (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_int64 (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_int64 (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name int64'
            write(stdout,*) 'F type MPI_INTEGER8'
            write(stdout,*) 'F type2 integer(int64)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_int64(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_int64 (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int64), intent(in) :: v
        integer(int64), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_int64 (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int64), intent(in) :: v
        integer(int64), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_int64(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v
        integer(int64), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_int64(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int64), intent(in)  :: v
        integer(int64), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_int64 (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER8, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_int64 (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_int64 (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_int64 (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int64), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_INTEGER8, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_int64 (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_int64 (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER8, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_int64 (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_int64(v, ret, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_int64 (v, ret, Lengths, Disps, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_int64 (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_int64 (v, ret, ierr, Node)

        integer(int64), intent(in), target :: v
        integer(int64), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_int64 (v, ret, Lengths, Disps, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_INTEGER8, &
                val_out, lengthsN, dispsN, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_int64 (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_INTEGER8, &
                val_out, Length, &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_int64 (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER8, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_int64(v, Ret, ierr, Node)

        integer(int64), intent(in), target :: v(:)
        integer(int64), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                val_out, &
                int(1, MPIArg), &
                MPI_INTEGER8, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_int64 (Ret, BuffSize, Source, Tag, ierr)

        integer(int64), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_int64 (v, BuffSize, Dest, Tag, ierr)

        integer(int64), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_INTEGER8, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_doub
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_doub
        module procedure MPIReduce_auto_doub
    end interface


    interface MPISum
        module procedure MPISum_len_doub
        module procedure MPISum_auto_doub
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_doub
        module procedure MPIBcast_len_doub
        module procedure MPIBcast_auto_doub
        module procedure MPIBcast_logic_doub
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_doub
        module procedure MPISumAll_auto_doub
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_doub
        module procedure MPIAllReduce_auto_doub
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_doub
        module procedure MPIScatter_auto_doub
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_doub
        module procedure MPIAllGather_auto_doub
        module procedure MPIAllGather_auto2_doub
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_doub
    end interface


    interface MPIGather
        module procedure MPIGather_len_doub
        module procedure MPIGather_auto_doub
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_doub
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_doub
        module procedure MPIScatterV_len2_doub
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_doub
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_doub
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_doub
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_doub
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_doub
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_doub
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_doub
    end interface

    interface MPISum_len
        module procedure MPISum_len_doub
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_doub
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_doub
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_doub
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_doub
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_doub
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_doub
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_doub
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_doub
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_doub
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_doub
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_doub
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_doub
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_doub
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_doub
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_doub
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_doub
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_doub
    end interface

    interface MPIRecv
        module procedure MPIRecv_doub
    end interface

    interface MPISend
        module procedure MPISend_doub
    end interface

contains
    subroutine MPIReduce_len_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_doub (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_doub (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_doub (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name doub'
            write(stdout,*) 'F type MPI_DOUBLE_PRECISION'
            write(stdout,*) 'F type2 real(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_doub(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_doub (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        real(dp), intent(in) :: v
        real(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_doub (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        real(dp), intent(in) :: v
        real(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_doub(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v
        real(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_doub(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        real(dp), intent(in)  :: v
        real(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_doub (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_doub (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_doub (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_doub (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        real(dp), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_DOUBLE_PRECISION, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_doub (v, SendSize, ret, RecvSize, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_doub (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_doub (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_doub(v, ret, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_doub (v, ret, Lengths, Disps, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_doub (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_doub (v, ret, ierr, Node)

        real(dp), intent(in), target :: v
        real(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_doub (v, ret, Lengths, Disps, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_PRECISION, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_doub (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_PRECISION, &
                val_out, Length, &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_doub (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_doub(v, Ret, ierr, Node)

        real(dp), intent(in), target :: v(:)
        real(dp), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_doub (Ret, BuffSize, Source, Tag, ierr)

        real(dp), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_doub (v, BuffSize, Dest, Tag, ierr)

        real(dp), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_PRECISION, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module



#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_comp
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_comp
        module procedure MPIReduce_auto_comp
    end interface


    interface MPISum
        module procedure MPISum_len_comp
        module procedure MPISum_auto_comp
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_comp
        module procedure MPIBcast_len_comp
        module procedure MPIBcast_auto_comp
        module procedure MPIBcast_logic_comp
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_comp
        module procedure MPISumAll_auto_comp
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_comp
        module procedure MPIAllReduce_auto_comp
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_comp
        module procedure MPIScatter_auto_comp
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_comp
        module procedure MPIAllGather_auto_comp
        module procedure MPIAllGather_auto2_comp
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_comp
    end interface


    interface MPIGather
        module procedure MPIGather_len_comp
        module procedure MPIGather_auto_comp
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_comp
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_comp
        module procedure MPIScatterV_len2_comp
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_comp
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_comp
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_comp
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_comp
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_comp
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_comp
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_comp
    end interface

    interface MPISum_len
        module procedure MPISum_len_comp
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_comp
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_comp
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_comp
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_comp
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_comp
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_comp
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_comp
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_comp
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_comp
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_comp
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_comp
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_comp
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_comp
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_comp
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_comp
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_comp
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_comp
    end interface

    interface MPIRecv
        module procedure MPIRecv_comp
    end interface

    interface MPISend
        module procedure MPISend_comp
    end interface

contains
    subroutine MPIReduce_len_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_comp (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_comp (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v (with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_comp (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name comp'
            write(stdout,*) 'F type MPI_DOUBLE_COMPLEX'
            write(stdout,*) 'F type2 complex(dp)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_comp(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_comp (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        complex(dp), intent(in) :: v
        complex(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_comp (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        complex(dp), intent(in) :: v
        complex(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_comp(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v
        complex(dp), intent(out) :: Ret
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_comp(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        complex(dp), intent(in)  :: v
        complex(dp), intent(out) :: Ret
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_comp (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_comp (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_comp (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_comp (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        complex(dp), intent(inout), target :: v
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int(1, MPIArg), &
                    MPI_DOUBLE_COMPLEX, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_comp (v, SendSize, ret, RecvSize, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_comp (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: Ret
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
#if 0 != 0
        Ret = v(1:size(Ret,0))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_comp (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto2_comp(v, ret, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                comm, err)
        ierr = err
#else
        !ret(1:%(mpilen)s) = v(1:%(mpilen)s)
        ret = v
        ierr = 0
#endif
    end subroutine


    subroutine MPIAllGatherV_auto_comp (v, ret, Lengths, Disps, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllGatherV'

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_AllGatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                comm, ierr)

        if (ierr /= MPI_SUCCESS) &
                        call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine




    subroutine MPIGather_len_comp (v, SendSize, Ret, RecvSize, ierr, &
                              Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret(1) = v
        ierr = 0
#endif
    end subroutine


    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGather_auto_comp (v, ret, ierr, Node)

        complex(dp), intent(in), target :: v
        complex(dp), intent(inout), target :: ret(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Gather (val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(1) = v
        ierr = 0
#endif
    end subroutine




    ! This gathers an array into another array with the same number of dims.
    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIGatherV_auto2_comp (v, ret, Lengths, Disps, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: ret(:)
        integer(MPIArg), intent(in) :: Disps(:), Lengths(:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

        ! The displacements multiplied up by other array dims
        integer(MPIArg) :: dispsN(ubound(Disps,1)-lbound(Disps,1)+1)
        integer(MPIArg) :: lengthsN(ubound(Lengths,1)-lbound(Lengths,1)+1)
        integer(MPIArg) :: LengthIn

#ifdef USE_MPI
        DispsN = int(Disps &
         * 1,MPIArg)
        LengthsN = int(Lengths &
         * 1,MPIArg)

        call GetComm (Comm, Node, rt)

        LengthIn = int(1 &
         * (ubound(v,0+1)-lbound(v,0+1)+1),MPIArg)

        call MPI_GatherV (val_in, LengthIn, &
                MPI_DOUBLE_COMPLEX, &
                val_out, lengthsN, dispsN, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        ret(Disps(1)+1:Disps(1)+Lengths(1)) = v(:)
        ierr = 0
#endif
    end subroutine



    ! This scatters an array into another array with the same number of dims.
    ! SendSizes are the lengths to send to each proc and Disps are the
    ! displacements of the data in SendBuf.  Each processor should know its
    ! own Length
    subroutine MPIScatterV_len2_comp (v, SendSizes, Disps, Ret, Length, &
                                 ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret(:)
        integer(MPIArg), intent(in) :: SendSizes(:), Disps(:), Length
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_ScatterV (val_in, SendSizes, Disps, &
                MPI_DOUBLE_COMPLEX, &
                val_out, Length, &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        !The length is divided by the dimensionality of
        !the array, as it is a total number of elements.
        Ret(:) = v(lbound(v,0+1):&
                  (lbound(v,0+1)+(Length/(1+0))-1))
        ierr = 0
#endif
    end subroutine


    subroutine MPIScatter_len_comp (v, SendSize, Ret, RecvSize, ierr, &
                               Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Scatter (val_in, int(SendSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, int(RecvSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    subroutine MPIScatter_auto_comp(v, Ret, ierr, Node)

        complex(dp), intent(in), target :: v(:)
        complex(dp), intent(inout), target :: Ret
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, err

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        ! TODO: Should the first of these be mpilen not mpilen2?
        call MPI_Scatter(val_in, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                val_out, &
                int(1, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                rt, comm, err)
        ierr = err
#else
        Ret = v(1)
        ierr = 0
#endif
    end subroutine



    subroutine MPIRecv_comp (Ret, BuffSize, Source, Tag, ierr)

        complex(dp), intent(out), target :: Ret
        integer, intent(in) :: BuffSize, tag, source
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: stat(MPI_STATUS_SIZE), err
        call MPI_Recv (val_out, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Source, MPIArg), int(Tag, MPIArg), CommGlobal, &
                stat, err)
        ierr = err
#else
        ierr = 0
        ret = ret
#endif
    end subroutine



    subroutine MPISend_comp (v, BuffSize, Dest, Tag, ierr)

        complex(dp), intent(in), target :: v
        integer, intent(in) :: BuffSize, tag, dest
        integer, intent(out) :: ierr

#ifdef USE_MPI
        integer(MPIArg) :: err
        call MPI_Send (val_in, int(BuffSize, MPIArg), &
                MPI_DOUBLE_COMPLEX, &
                int(Dest, MPIArg), int(Tag, MPIArg), CommGlobal, err)
        ierr = err
#else
        ierr = 0
#endif
    end subroutine

end module

#if !defined(SX)


#include "macros.h"

!
! n.b HACK
! We need to be able to do a bit of hackery when using C-based MPI
!
! --> We relabel things a bit...
#define val_in v
#define val_out Ret

module par_internal_arr_int
    use MPI_wrapper
    use CalcData, only: iLogicalNodeSize
    use, intrinsic :: iso_c_binding, only: c_size_t, c_char
    use constants
    use fmt_utils
    use error_handling_neci, only: stop_all, neci_flush
    implicit none

    interface MPIReduce
        module procedure MPIReduce_len_arr_int
        module procedure MPIReduce_auto_arr_int
    end interface


    interface MPISum
        module procedure MPISum_len_arr_int
        module procedure MPISum_auto_arr_int
    end interface


    interface MPIBcast
        module procedure MPIBcast_lenroot_arr_int
        module procedure MPIBcast_len_arr_int
        module procedure MPIBcast_auto_arr_int
        module procedure MPIBcast_logic_arr_int
    end interface


    interface MPISumAll
        module procedure MPISumAll_len_arr_int
        module procedure MPISumAll_auto_arr_int
    end interface


    interface MPIAllReduce
        module procedure MPIAllReduce_len_arr_int
        module procedure MPIAllReduce_auto_arr_int
    end interface


    interface MPIScatter
        module procedure MPIScatter_len_arr_int
        module procedure MPIScatter_auto_arr_int
    end interface


    interface MPIAllGather
        module procedure MPIAllGather_len_arr_int
        module procedure MPIAllGather_auto_arr_int
        module procedure MPIAllGather_auto2_arr_int
    end interface


    interface MPIAllGatherV
        module procedure MPIAllGatherV_auto_arr_int
    end interface


    interface MPIGather
        module procedure MPIGather_len_arr_int
        module procedure MPIGather_auto_arr_int
    end interface


    interface MPIGatherV
        module procedure MPIGatherV_auto2_arr_int
    end interface


    interface MPIScatterV
        !module procedure MPIScatterV_len_arr_int
        module procedure MPIScatterV_len2_arr_int
    end interface

    interface MPIReduce_len
        module procedure MPIReduce_len_arr_int
    end interface

    interface MPIReduce_auto
        module procedure MPIReduce_auto_arr_int
    end interface

    interface MPIAllReduce_len
        module procedure MPIAllReduce_len_arr_int
    end interface

    interface MPIAllReduce_auto
        module procedure MPIAllReduce_auto_arr_int
    end interface

    interface MPIAllReduceDatatype
        module procedure MPIAllReduceDatatype_arr_int
    end interface

    interface MPISumAll_len
        module procedure MPISumAll_len_arr_int
    end interface

    interface MPISumAll_auto
        module procedure MPISumAll_auto_arr_int
    end interface

    interface MPISum_len
        module procedure MPISum_len_arr_int
    end interface

    interface MPISum_auto
        module procedure MPISum_auto_arr_int
    end interface

    interface MPIBCast_lenroot
        module procedure MPIBCast_lenroot_arr_int
    end interface

    interface MPIBCast_len
        module procedure MPIBCast_len_arr_int
    end interface

    interface MPIBCast_auto
        module procedure MPIBCast_auto_arr_int
    end interface

    interface MPIBCast_logic
        module procedure MPIBCast_logic_arr_int
    end interface

    interface MPIAlltoAll
        module procedure MPIAlltoAll_arr_int
    end interface

    interface MPIAlltoAllV
        module procedure MPIAlltoAllV_arr_int
    end interface

    interface MPIAllGather_len
        module procedure MPIAllGather_len_arr_int
    end interface

    interface MPIAllGather_auto
        module procedure MPIAllGather_auto_arr_int
    end interface

    interface MPIAllGather_auto2
        module procedure MPIAllGather_auto2_arr_int
    end interface

    interface MPIAllGatherV_auto
        module procedure MPIAllGatherV_auto_arr_int
    end interface

    interface MPIGather_len
        module procedure MPIGather_len_arr_int
    end interface

    interface MPIGather_auto
        module procedure MPIGather_auto_arr_int
    end interface

    interface MPIGatherV_auto2
        module procedure MPIGatherV_auto2_arr_int
    end interface

    interface MPIScatterV_len2
        module procedure MPIScatterV_len2_arr_int
    end interface

    interface MPIScatter_len
        module procedure MPIScatter_len_arr_int
    end interface

    interface MPIScatter_auto
        module procedure MPIScatter_auto_arr_int
    end interface

    interface MPIRecv
        module procedure MPIRecv_arr_int
    end interface

    interface MPISend
        module procedure MPISend_arr_int
    end interface

contains
    subroutine MPIReduce_len_arr_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE of type iType on the elements v --> ret. The
        ! number of elements to transmit is specified by iLen.
        !
        ! In:  v - The elements to be reduced over the processors.
        !      iLen   - The length of the data (in elements of its type)
        !      iType  - MPI specification (e.g. MPI_MAX)
        ! Out: Ret    - The reduced elements are returned in this array
        !               *** ON ROOT ONLY ***
        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, rt, ierr
        character(*), parameter :: t_r = "MPIReduce"

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                iType, rt, Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        Ret = v
#endif
    end subroutine



    subroutine MPIReduce_auto_arr_int (v, iType, Ret, Node)

        ! The same as MPIReduce_len, without the iLen specification. The
        ! number of elements is determined automatically.

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) ::  comm, rt, ierr
        character(*), parameter :: t_r = 'MPIReduce'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt)

        call MPI_Reduce(val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, iType, rt, &
                Comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_len_arr_int (v, iLen, iType, Ret, Node)

        ! Call MPI_REDUCE with the type iType on the array v(with length
        ! iLen) outputting the results on ALL processors in the array Ret.
        !
        ! In:  v - Data to reduce
        !      iLen   - Number of elements in v and Ret
        !      iType  - Reduction operation to perform
        ! Out: Ret    - Reduced data

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer, intent(in) :: iLen
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, ierr
        character(*), parameter :: t_r = 'MPIAllReduce'
        ! Error handling vars
        character(255) :: string
        integer(MPIArg) :: length, jerr, errorclass
#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) "V -- > R AAAAAAAA", v, ret
            write(stdout,*) 'F name arr_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call mpi_error_class(ierr, errorclass, jerr)
            call mpi_error_string(errorclass,string, length, jerr)
            call stop_all (t_r, string)
        end if
#else
        ret = v
#endif

    end subroutine



    subroutine MPIAllReduce_auto_arr_int (v, iType, Ret, Node)

        ! The same as MPIAllReduce_len, but the length of array Value (and
        ! thus Ret) is determinend automagically

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: iType
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, ierr, aerr
        integer(MPIArg) :: msglen
        character(LEN=MPI_MAX_ERROR_STRING) :: errmsg
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                itype, comm, ierr)

        if (ierr /= MPI_SUCCESS) then
            write(stdout,*) 'F name arr_int'
            write(stdout,*) 'F type MPI_INTEGER4'
            write(stdout,*) 'F type2 integer(int32)'
            write(stdout,*) 'Opp', itype
            write(stdout,*) 'ierr', ierr
            call MPI_ERROR_STRING(ierr,errmsg,msglen,aerr)
            write(stdout,*) errmsg
            call stop_all (t_r, 'MPI Error. Terminating.')
        end if
#else
        ret = v
#endif

    end subroutine



    Subroutine MPIAllReduceDatatype_arr_int(v, iLen, iType, dtype, Ret, Node)

        ! Perform MPI_Allreduce, specifying the datatype in the call.
        ! This is required for special datatypes, e.g. MPI_2INTEGER.
        !
        ! In:  v     - Input array to reduce
        !      iLen  - Number of elements in v, Ret
        !      iType - Operation to perform (e.g. MPI_MAX)
        !      dtype - Data type to pass to MPI (e.g. MPI_2INTEGER)
        !
        ! Out: Ret   - Reduced array.

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer(MPIArg), intent(in) :: dtype, itype
        integer, intent(in) :: ilen
        type(CommI), intent(in),optional :: Node
        integer(MPIArg) :: ierr, comm
        character(*), parameter :: t_r = 'MPIAllReduce'

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allreduce (val_in, val_out, int(iLen, MPIArg), dtype, itype, &
                            comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#else
        ret = v
#endif

    end subroutine



    subroutine MPISumAll_len_arr_int (v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result in Ret on all
        ! of the processors
        !
        ! In:  v   - Array of data to contribute to the sum
        !      iLen     - Number of data elements in v
        !
        ! Out: Ret      - An array of the same size as v to contain the
        !                 summed v

        integer(int32), intent(in) :: v(:)
        integer(int32), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISumAll_auto_arr_int (v, Ret, Node)

        ! The same as MPISumAll_auto, but the length of v is determined
        ! automagically

        integer(int32), intent(in) :: v(:)
        integer(int32), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIAllReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISumAll_inplace (v, Node)
!
!        ! The same as MPISumAll, but returning the results destructively
!        ! in v
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in),optional :: Node
!
!        call MPIAllReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPISum_len_arr_int(v, iLen, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified)
        !
        ! In:  v  - Array of data to contribute to the sum
        !      iLen    - Number of data elements in v
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:)
        integer(int32), intent(out) :: Ret(:)
        integer, intent(in) :: iLen
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, iLen, MPI_SUM, Ret, Node)

    end subroutine



    subroutine MPISum_auto_arr_int(v, Ret, Node)

        ! Sum data on different processors, leaving the result only on the
        ! root node. (Or the node specified). We don't need to specify the
        ! length.
        !
        ! In:  v  - Array of data to contribute to the sum
        !      Node    - The node leave the final values on.
        ! Out: Ret     - An array of the same size as v to contain the
        !                summed v.

        integer(int32), intent(in)  :: v(:)
        integer(int32), intent(out) :: Ret(:)
        type(CommI), intent(in), optional :: Node

        call MPIReduce (v, MPI_SUM, Ret, Node)

    end subroutine



!    subroutine MPISum_inplace (v, Node)
!
!        %%%%(type1)s, intent(inout) :: v()
!        type(CommI), intent(in), optional :: Node
!
!        call MPIReduce_inplace (v, MPI_SUM, Node)
!
!    end subroutine



    subroutine MPIBCast_lenroot_arr_int (v, iLen, rt)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        integer, intent(in) :: iLen, rt
        integer(MPIArg) :: ierr
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call MPI_Bcast (val_in, int(iLen, MPIArg), &
                MPI_INTEGER4, &
                int(rt, MPIArg), CommGlobal, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_len_arr_int (v, iLen, Node)

        ! Call MPI_BCAST to broadcast the value(s) in array v on processor
        ! Root to all processors, where the number of elements in array v is
        ! specified by iLen.
        !
        ! In:    iLen   - The number of elements in v
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        integer, intent(in) :: iLen
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, int(ilen, MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')

#endif
    end subroutine



    subroutine MPIBCast_auto_arr_int (v, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In:    Root   - The processor to broadcast from
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        integer(MPIArg) :: ierr, comm, rt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBcast'

#ifdef USE_MPI
        call GetComm (Comm, Node, rt)

        call MPI_Bcast (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                rt, comm, ierr)

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIBCast_logic_arr_int (v, tMe, Node)

        ! The same as MPIBcast_len, but the number of elements in v is
        ! determined automagically
        !
        ! In: tMe - Set to be true by the processor which is sending the info
        ! InOut: v - The data to broadcast, and the returned v

        integer(int32), intent(inout), target :: v(:)
        logical, intent(in) :: tMe
        integer(MPIArg) :: ierr, comm, rt, nrt
        type(CommI), intent(in), optional :: Node
        character(*), parameter :: t_r = 'MPIBCast'

#ifdef USE_MPI
        call GetComm(Comm, Node, rt, tMe)

        ! Which processor is root?
        call MPIAllreducert (rt, nrt, comm, ierr)

        if (ierr == MPI_SUCCESS) then
            call MPI_Bcast (val_in, &
                    int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                    MPI_INTEGER4, &
                    nrt, comm, ierr)
        end if

        if (ierr /= MPI_SUCCESS) &
            call stop_all (t_r, 'MPI Error. Terminating.')
#endif
    end subroutine



    subroutine MPIAlltoAll_arr_int (v, SendSize, ret, RecvSize, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(out), target :: Ret(:)
        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Alltoall (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret = v
        ierr = 0
#endif

    end subroutine



    subroutine MPIAlltoAllV_arr_int (v, SendSizes, SendOffsets, Ret, &
                             RecvSizes, RecvOffsets, ierr, Node)

        integer(MPIArg), intent(in) :: SendSizes(:), SendOffsets(:)
        integer(MPIArg), intent(in) :: RecvSizes(:), RecvOffsets(:)
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret(:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)
        call MPI_Alltoallv (val_in, SendSizes, &
                SendOffsets, &
                MPI_INTEGER4, &
                val_out, &
                RecvSizes, &
                RecvOffsets, &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
#if 1 != 0
        Ret(:) = v(1:size(Ret,1))
#else
        Ret = v
#endif
        ierr = 0
#endif
    end subroutine



    subroutine MPIAllGather_len_arr_int (v, SendSize, Ret, RecvSize, ierr, &
                                 Node)

        integer, intent(in) :: SendSize, RecvSize
        integer, intent(out) :: ierr
        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: Ret(:,:)
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_AllGather (val_in, int(SendSize, MPIArg), &
                MPI_INTEGER4, &
                val_out, int(RecvSize, MPIArg), &
                MPI_INTEGER4, &
                comm, err)
        ierr = err
#else
        Ret(:,1) = v
        ierr = 0
#endif
    end subroutine



    ! v is the Send Buffer
    ! ret is the Receive Buffer
    Subroutine MPIAllGather_auto_arr_int(v, ret, ierr, Node)

        integer(int32), intent(in), target :: v(:)
        integer(int32), intent(inout), target :: ret(:,:)
        integer, intent(out) :: ierr
        type(CommI), intent(in), optional :: Node
        integer(MPIArg) :: Comm, err

#ifdef USE_MPI
        call GetComm (Comm, Node)

        call MPI_Allgather (val_in, &
                int((ubound(v,1)-lbound(v,1)+1), MPIArg), &
                MPI_INTEGER4, &
                val_out, &
                int((ubound(ret,1)-lbound(ret,1)+1), MPIArg), &
                MPI_