write_walkers Subroutine

private subroutine write_walkers(parent, MaxEx)

Arguments

Type IntentOptional Attributes Name
integer(kind=hid_t), intent(in) :: parent
integer, intent(in), optional :: MaxEx

Contents

Source Code


Source Code

    subroutine write_walkers(parent, MaxEx)

        use FciMCData, only: CurrentDets, &
                             TotWalkers, iLutHF
        use DetBitOps, only: FindBitExcitLevel, tAccumEmptyDet
        use global_det_data, only: writeFValsAsInt, writeAPValsAsInt

        ! Output the wavefunction information to the relevant groups in the
        ! wavefunction.

        integer(hid_t), intent(in) :: parent
        integer, intent(in), optional :: MaxEx

        character(*), parameter :: t_r = 'write_walkers'

        integer(hid_t) :: wfn_grp_id
        integer(hdf_err) :: err

        integer(hsize_t) :: counts(0:nProcessors - 1)
        integer(hsize_t) :: all_count

        integer(int32) :: bit_rep_width
        integer(hsize_t) :: write_offset(2)
        real(dp) :: all_parts(lenof_sign), all_norm_sqr(lenof_sign)
        integer :: ierr
        integer(n_int), allocatable :: gdata_buf(:, :)

        integer :: ExcitLevel
        integer(hsize_t) :: printed_count
        integer(kind=n_int), allocatable, target :: TmpVecDets(:, :)
        integer(kind=n_int), pointer :: PrintedDets(:, :)
        integer :: i
        real(dp) :: CurrentSign(lenof_sign), printed_tot_parts(lenof_sign), &
                    printed_norm_sqr(inum_runs)
        type(gdata_io_t) :: gdata_write_handler
        integer :: gdata_size
#ifdef CMPLX_
        integer :: run
#endif


        ! We do not want to print all dets. At least empty dets should be skipped
        ! Let us find out which ones to be printed and copy them
        allocate(TmpVecDets(0:NIfTot, TotWalkers))
        PrintedDets => TmpVecDets
        printed_count = 0
        printed_norm_sqr = 0
        printed_tot_parts = 0

        call gdata_write_handler%init_gdata_io(tAutoAdaptiveShift, &
                                               tScaleBlooms, tAccumPopsActive, fvals_size, max_ratio_size, lenof_sign + 1)
        gdata_size = gdata_write_handler%entry_size()
        if (gdata_size > 0) allocate(gdata_buf(gdata_size, TotWalkers))

        do i = 1, int(TotWalkers)
            call extract_sign(CurrentDets(:, i), CurrentSign)
            ! Skip empty determinants (unless we are accumulating its population)
            if (IsUnoccDet(CurrentSign) .and. .not. tAccumEmptyDet(CurrentDets(:, i))) cycle

            ExcitLevel = FindBitExcitLevel(iLutHF, CurrentDets(:, i))

            ! If we are printing truncated popsfiles, skip over large excitations
            if (present(MaxEx) .and. ExcitLevel > MaxEx) cycle

            ! If we are reducing the size of popsfiles, skip dets with low population and high excitations
            if (tReduceHDF5Pops) then
                if (all(abs(CurrentSign) <= HDF5PopsMin) .and. ExcitLevel > iHDF5PopsMinEx) cycle
            end if

            printed_count = printed_count + 1

            !Fill in det
            PrintedDets(:, printed_count) = CurrentDets(:, i)

            !Fill in global data
            if (gdata_size > 0) then
                ! get the statistics of THIS processor
                call gdata_write_handler%write_gdata_hdf5(gdata_buf(:, printed_count), i)
            end if

            ! Fill in stats
            printed_tot_parts = printed_tot_parts + abs(CurrentSign)
#ifdef CMPLX_
            do run = 1, inum_runs
                printed_norm_sqr(run) = printed_norm_sqr(run) + &
                                        sum(CurrentSign(min_part_type(run):max_part_type(run))**2)
            end do
#else
            printed_norm_sqr = printed_norm_sqr + CurrentSign**2
#endif
        end do

        ! TODO: Add a (slower) fallback routine for weird cases, odd HDF libs

        ! Firstly create the group for storing wavefunction info
        call h5gcreate_f(parent, nm_wfn_grp, wfn_grp_id, err)

        ! TODO: Refactor these chunks into their own little subroutines.
        ! We fix the format of the binary file. Thus if we are on a 32-bit
        ! build, we need to convert the data into 64-bit compatible chunks.
        if (build_64bit) then
            bit_rep_width = NIfD + 1
        else
            bit_rep_width = 2 * (NIfD + 1)
            call stop_all(t_r, "Needs manual, careful, testing")
        end if

        ! How many occupied determinants are there on each of the processors
        call MPIAllGather(printed_count, counts, ierr)
        all_count = sum(counts)
        write_offset = [0_hsize_t, sum(counts(0:iProcIndex - 1))]

        ! Output the bit-representation data
        call write_int32_attribute(wfn_grp_id, nm_rep_width, bit_rep_width)
        call write_int32_attribute(wfn_grp_id, nm_sgn_len, &
                                   int(lenof_sign, int32))
        call write_int64_attribute(wfn_grp_id, nm_num_dets, all_count)
        ! TODO: Check these values. May need to sum them explicitly

        ! denote if auto-adaptive shift was used
        call write_log_scalar(wfn_grp_id, nm_tauto, tAutoAdaptiveShift)
        call write_log_scalar(wfn_grp_id, nm_sc_blooms, tScaleBlooms)

        ! denote if accum-pos will be written
        call write_log_scalar(wfn_grp_id, nm_tAP, tAccumPopsActive)
        if (tAccumPopsActive) then
            !Write the number of iterations, during which the populations have
            !been accumulated
            call write_int64_scalar(wfn_grp_id, nm_accum_counter, iAccumPopsCounter)
        end if

        ! Accumulated values only valid on head node. collate_iter_data
        ! has not yet run.
        call MPISumAll(printed_norm_sqr, all_norm_sqr)
        call MPISumAll(printed_tot_parts, all_parts)
        call write_dp_1d_attribute(wfn_grp_id, nm_norm_sqr, all_norm_sqr)
        call write_dp_1d_attribute(wfn_grp_id, nm_num_parts, all_parts)

        !we do an explicitly buffered write to avoid performance problems with
        !complicated hyperslabs + collective buffering
        ! Write out the determinant bit-representations
        call write_2d_multi_arr_chunk_buff( &
            wfn_grp_id, nm_ilut, h5kind_to_type(int64, H5_INTEGER_KIND), &
            PrintedDets, &
            [int(nifd + 1, hsize_t), int(printed_count, hsize_t)], & ! dims
            [0_hsize_t, 0_hsize_t], & ! offset
            [int(nifd + 1, hsize_t), all_count], & ! all dims
            [0_hsize_t, sum(counts(0:iProcIndex - 1))] & ! output offset
            )

        call write_2d_multi_arr_chunk_buff( &
            wfn_grp_id, nm_sgns, h5kind_to_type(dp, H5_REAL_KIND), &
            PrintedDets, &
            [int(lenof_sign, hsize_t), int(printed_count, hsize_t)], & ! dims
            [int(IlutBits%ind_pop, hsize_t), 0_hsize_t], & ! offset
            [int(lenof_sign, hsize_t), all_count], & ! all dims
            [0_hsize_t, sum(counts(0:iProcIndex - 1))] & ! output offset
            )

        deallocate(TmpVecDets)

        if (gdata_size > 0) then
            call write_2d_multi_arr_chunk_buff( &
                wfn_grp_id, nm_gdata, h5kind_to_type(dp, H5_REAL_KIND), gdata_buf, &
                [int(gdata_size, hsize_t), int(printed_count, hsize_t)], & ! dims
                [0_hsize_t, 0_hsize_t], &
                [int(gdata_size, hsize_t), all_count], &
                [0_hsize_t, sum(counts(0:iProcIndex - 1))] &
                )
            deallocate(gdata_buf)
        end if

        ! And we are done
        call h5gclose_f(wfn_grp_id, err)

    end subroutine write_walkers