subroutine write_walkers(parent, MaxEx)
use FciMCData, only: CurrentDets, &
TotWalkers, iLutHF
use DetBitOps, only: FindBitExcitLevel, tAccumEmptyDet
use global_det_data, only: writeFValsAsInt, writeAPValsAsInt
! Output the wavefunction information to the relevant groups in the
! wavefunction.
integer(hid_t), intent(in) :: parent
integer, intent(in), optional :: MaxEx
character(*), parameter :: t_r = 'write_walkers'
integer(hid_t) :: wfn_grp_id
integer(hdf_err) :: err
integer(hsize_t) :: counts(0:nProcessors - 1)
integer(hsize_t) :: all_count
integer(int32) :: bit_rep_width
integer(hsize_t) :: write_offset(2)
real(dp) :: all_parts(lenof_sign), all_norm_sqr(lenof_sign)
integer :: ierr
integer(n_int), allocatable :: gdata_buf(:, :)
integer :: ExcitLevel
integer(hsize_t) :: printed_count
integer(kind=n_int), allocatable, target :: TmpVecDets(:, :)
integer(kind=n_int), pointer :: PrintedDets(:, :)
integer :: i
real(dp) :: CurrentSign(lenof_sign), printed_tot_parts(lenof_sign), &
printed_norm_sqr(inum_runs)
type(gdata_io_t) :: gdata_write_handler
integer :: gdata_size
#ifdef CMPLX_
integer :: run
#endif
! We do not want to print all dets. At least empty dets should be skipped
! Let us find out which ones to be printed and copy them
allocate(TmpVecDets(0:NIfTot, TotWalkers))
PrintedDets => TmpVecDets
printed_count = 0
printed_norm_sqr = 0
printed_tot_parts = 0
call gdata_write_handler%init_gdata_io(tAutoAdaptiveShift, &
tScaleBlooms, tAccumPopsActive, fvals_size, max_ratio_size, lenof_sign + 1)
gdata_size = gdata_write_handler%entry_size()
if (gdata_size > 0) allocate(gdata_buf(gdata_size, TotWalkers))
do i = 1, int(TotWalkers)
call extract_sign(CurrentDets(:, i), CurrentSign)
! Skip empty determinants (unless we are accumulating its population)
if (IsUnoccDet(CurrentSign) .and. .not. tAccumEmptyDet(CurrentDets(:, i))) cycle
ExcitLevel = FindBitExcitLevel(iLutHF, CurrentDets(:, i))
! If we are printing truncated popsfiles, skip over large excitations
if (present(MaxEx) .and. ExcitLevel > MaxEx) cycle
! If we are reducing the size of popsfiles, skip dets with low population and high excitations
if (tReduceHDF5Pops) then
if (all(abs(CurrentSign) <= HDF5PopsMin) .and. ExcitLevel > iHDF5PopsMinEx) cycle
end if
printed_count = printed_count + 1
!Fill in det
PrintedDets(:, printed_count) = CurrentDets(:, i)
!Fill in global data
if (gdata_size > 0) then
! get the statistics of THIS processor
call gdata_write_handler%write_gdata_hdf5(gdata_buf(:, printed_count), i)
end if
! Fill in stats
printed_tot_parts = printed_tot_parts + abs(CurrentSign)
#ifdef CMPLX_
do run = 1, inum_runs
printed_norm_sqr(run) = printed_norm_sqr(run) + &
sum(CurrentSign(min_part_type(run):max_part_type(run))**2)
end do
#else
printed_norm_sqr = printed_norm_sqr + CurrentSign**2
#endif
end do
! TODO: Add a (slower) fallback routine for weird cases, odd HDF libs
! Firstly create the group for storing wavefunction info
call h5gcreate_f(parent, nm_wfn_grp, wfn_grp_id, err)
! TODO: Refactor these chunks into their own little subroutines.
! We fix the format of the binary file. Thus if we are on a 32-bit
! build, we need to convert the data into 64-bit compatible chunks.
if (build_64bit) then
bit_rep_width = NIfD + 1
else
bit_rep_width = 2 * (NIfD + 1)
call stop_all(t_r, "Needs manual, careful, testing")
end if
! How many occupied determinants are there on each of the processors
call MPIAllGather(printed_count, counts, ierr)
all_count = sum(counts)
write_offset = [0_hsize_t, sum(counts(0:iProcIndex - 1))]
! Output the bit-representation data
call write_int32_attribute(wfn_grp_id, nm_rep_width, bit_rep_width)
call write_int32_attribute(wfn_grp_id, nm_sgn_len, &
int(lenof_sign, int32))
call write_int64_attribute(wfn_grp_id, nm_num_dets, all_count)
! TODO: Check these values. May need to sum them explicitly
! denote if auto-adaptive shift was used
call write_log_scalar(wfn_grp_id, nm_tauto, tAutoAdaptiveShift)
call write_log_scalar(wfn_grp_id, nm_sc_blooms, tScaleBlooms)
! denote if accum-pos will be written
call write_log_scalar(wfn_grp_id, nm_tAP, tAccumPopsActive)
if (tAccumPopsActive) then
!Write the number of iterations, during which the populations have
!been accumulated
call write_int64_scalar(wfn_grp_id, nm_accum_counter, iAccumPopsCounter)
end if
! Accumulated values only valid on head node. collate_iter_data
! has not yet run.
call MPISumAll(printed_norm_sqr, all_norm_sqr)
call MPISumAll(printed_tot_parts, all_parts)
call write_dp_1d_attribute(wfn_grp_id, nm_norm_sqr, all_norm_sqr)
call write_dp_1d_attribute(wfn_grp_id, nm_num_parts, all_parts)
!we do an explicitly buffered write to avoid performance problems with
!complicated hyperslabs + collective buffering
! Write out the determinant bit-representations
call write_2d_multi_arr_chunk_buff( &
wfn_grp_id, nm_ilut, h5kind_to_type(int64, H5_INTEGER_KIND), &
PrintedDets, &
[int(nifd + 1, hsize_t), int(printed_count, hsize_t)], & ! dims
[0_hsize_t, 0_hsize_t], & ! offset
[int(nifd + 1, hsize_t), all_count], & ! all dims
[0_hsize_t, sum(counts(0:iProcIndex - 1))] & ! output offset
)
call write_2d_multi_arr_chunk_buff( &
wfn_grp_id, nm_sgns, h5kind_to_type(dp, H5_REAL_KIND), &
PrintedDets, &
[int(lenof_sign, hsize_t), int(printed_count, hsize_t)], & ! dims
[int(IlutBits%ind_pop, hsize_t), 0_hsize_t], & ! offset
[int(lenof_sign, hsize_t), all_count], & ! all dims
[0_hsize_t, sum(counts(0:iProcIndex - 1))] & ! output offset
)
deallocate(TmpVecDets)
if (gdata_size > 0) then
call write_2d_multi_arr_chunk_buff( &
wfn_grp_id, nm_gdata, h5kind_to_type(dp, H5_REAL_KIND), gdata_buf, &
[int(gdata_size, hsize_t), int(printed_count, hsize_t)], & ! dims
[0_hsize_t, 0_hsize_t], &
[int(gdata_size, hsize_t), all_count], &
[0_hsize_t, sum(counts(0:iProcIndex - 1))] &
)
deallocate(gdata_buf)
end if
! And we are done
call h5gclose_f(wfn_grp_id, err)
end subroutine write_walkers