store_whole_core_space Subroutine

public subroutine store_whole_core_space(rep)

Arguments

Type IntentOptional Attributes Name
type(core_space_t), intent(inout) :: rep

Contents


Source Code

    subroutine store_whole_core_space(rep)

        use FciMCData, only: CoreSpaceTag
        use MemoryManager, only: LogMemAlloc
        use Parallel_neci, only: MPIAllGatherV, iProcIndex_inter, iProcIndex_intra, &
                                 mpi_comm_inter, mpi_comm_intra, nNodes, NodeLengths, &
                                 iNodeIndex, MPIAllGather
        type(core_space_t), intent(inout) :: rep
        integer(MPIArg) :: MPIerr
        integer :: ierr
        character(len=*), parameter :: t_r = "store_whole_core_space"
        integer(MPIArg), allocatable :: sizes_this_node(:)
        integer(MPIArg) :: total_size_this_node
        integer(MPIArg), allocatable :: node_offsets(:), sizes_per_node(:)
        integer(MPIArg) :: proc_offset
        integer(MPIArg) :: core_width
        integer :: i
        integer(MPIArg) :: node_size, num_nodes
        integer(MPIArg) :: global_offset

        call mpi_comm_size(mpi_comm_intra, node_size, MPIerr)
        call mpi_comm_size(mpi_comm_inter, num_nodes, MPIerr)

        allocate(sizes_this_node(0:node_size - 1))
        allocate(node_offsets(0:num_nodes - 1))
        allocate(sizes_per_node(0:num_nodes - 1))

        call shared_allocate_mpi(rep%core_space_win, rep%core_space_direct, &
                                 [int(1 + NIfTot, int64), int(rep%determ_space_size, int64)], ierr)
        ! Convert from 1-based first dimension to 0-based first dimension as used in iluts
        rep%core_space(0:, 1:) => rep%core_space_direct(1:, 1:)
        call LogMemAlloc('core_space', maxval(rep%determ_sizes) * (NIfTot + 1), 8, t_r, &
                         rep%CoreSpaceTag, err=ierr)
        if (iProcIndex_intra == 0) rep%core_space = 0_n_int

        ! Write the core-space on this node into core_space
        call MPI_AllGather(rep%determ_sizes(iProcIndex), 1, MPI_INTEGER, &
                           sizes_this_node, 1, MPI_INTEGER, mpi_comm_intra, MPIerr)

        ! Get the intra-node offset
        proc_offset = 0
        do i = 1, iProcIndex_intra
            proc_offset = proc_offset + sizes_this_node(i - 1)
        end do

        ! Sum the size on this node
        total_size_this_node = sum(sizes_this_node)

        call MPI_AllGather(total_size_this_node, 1, MPI_INTEGER, sizes_per_node, 1, MPI_INTEGER, &
                           mpi_comm_inter, MPIerr)

        ! Get the inter-node offset
        node_offsets(0) = 0
        do i = 1, num_nodes - 1
            node_offsets(i) = node_offsets(i - 1) + sizes_per_node(i - 1)
        end do

        global_offset = node_offsets(iProcIndex_inter) + proc_offset

        call MPI_Win_Sync(rep%core_space_win, MPIerr)
        call MPI_Barrier(mpi_comm_intra, MPIerr)
        rep%core_space(0:NIfTot, (global_offset + 1): &
                       (global_offset + rep%determ_sizes(iProcIndex))) = &
            SpawnedParts(0:NIfTot, 1:rep%determ_sizes(iProcIndex))
        call MPI_Win_Sync(rep%core_space_win, MPIerr)
        call MPI_Barrier(mpi_comm_intra, MPIerr)
        call MPI_Win_Sync(rep%core_space_win, MPIerr)

        ! Multiply with message width (1+NIfTot)
        core_width = int(size(rep%core_space, dim=1), MPIArg)
        node_offsets = node_offsets * core_width
        total_size_this_node = total_size_this_node * core_width
        sizes_per_node = sizes_per_node * core_width

        ! Give explicit limits for SpawnedParts slice, as NIfTot is not nesc.
        ! equal to NIfBCast. (It may be longer)
        call MPI_AllGatherV(MPI_IN_PLACE, total_size_this_node, MPI_INTEGER8, rep%core_space, &
                            sizes_per_node, node_offsets, MPI_INTEGER8, mpi_comm_inter, MPIerr)

        ! And sync the shared window
        call MPI_Win_Sync(rep%core_space_win, MPIerr)
        call MPI_Barrier(mpi_comm_intra, MPIerr)
        ! Communicate the indices in the full vector at which the various processors take over, relative
        ! to the first index position in the vector (i.e. the array displs in MPI routines).
        call MPIAllGather(global_offset, rep%determ_displs, ierr)

        deallocate(sizes_per_node)
        deallocate(node_offsets)
        deallocate(sizes_this_node)

    end subroutine store_whole_core_space