subroutine determ_projection()
! This subroutine gathers together partial_determ_vecs from each processor so
! that the full vector for the whole deterministic space is stored on each processor.
! It then performs the deterministic multiplication of the projector on this full vector.
use FciMCData, only: SemiStoch_Comms_Time
use FciMCData, only: SemiStoch_Multiply_Time
use Parallel_neci, only: MPIBarrier, MPIAllGatherV
use DetBitOps, only: DetBitEQ
integer :: run
integer :: i, j, part_type, c_run
integer :: ierr
integer(MPIArg) :: MPIerr
real(dp) :: scaledDiagSft(inum_runs)
do run = 1, size(cs_replicas)
associate(rep => cs_replicas(run))
call MPIBarrier(ierr)
call set_timer(SemiStoch_Comms_Time)
call MPIAllGatherV(rep%partial_determ_vecs, rep%full_determ_vecs, &
rep%determ_sizes, rep%determ_displs)
call halt_timer(SemiStoch_Comms_Time)
call set_timer(SemiStoch_Multiply_Time)
if (rep%determ_sizes(iProcIndex) >= 1) then
! For the moment, we're only adding in these contributions when we need the energy
! This will need refinement if we want to continue with the option of inst vs true full RDMs
! (as in another CMO branch).
! Perform the multiplication.
rep%partial_determ_vecs = 0.0_dp
#ifdef CMPLX_
block
integer :: r_pt, i_pt
do i = 1, rep%determ_sizes(iProcIndex)
do j = 1, rep%sparse_core_ham(i)%num_elements
do r_pt = rep%min_part(), rep%max_part(), 2
i_pt = r_pt + 1
rep%partial_determ_vecs(r_pt, i) = rep%partial_determ_vecs(r_pt, i) &
- Real(rep%sparse_core_ham(i)%elements(j)) &
* rep%full_determ_vecs(r_pt, rep%sparse_core_ham(i)%positions(j)) &
+ Aimag(rep%sparse_core_ham(i)%elements(j)) &
* rep%full_determ_vecs(i_pt, rep%sparse_core_ham(i)%positions(j))
rep%partial_determ_vecs(i_pt, i) = rep%partial_determ_vecs(i_pt, i) &
- Aimag(rep%sparse_core_ham(i)%elements(j)) &
* rep%full_determ_vecs(r_pt, rep%sparse_core_ham(i)%positions(j)) &
- Real(rep%sparse_core_ham(i)%elements(j)) &
* rep%full_determ_vecs(i_pt, rep%sparse_core_ham(i)%positions(j))
end do
end do
end do
end block
#else
do i = 1, rep%determ_sizes(iProcIndex)
do j = 1, rep%sparse_core_ham(i)%num_elements
rep%partial_determ_vecs(:, i) = rep%partial_determ_vecs(:, i) &
- rep%sparse_core_ham(i)%elements(j) &
* rep%full_determ_vecs(:, rep%sparse_core_ham(i)%positions(j))
end do
end do
#endif
! Now add shift*full_determ_vecs to account for the shift, not stored in
! sparse_core_ham.
#ifdef CMPLX_
do i = 1, rep%determ_sizes(iProcIndex)
! Only scale the shift for the corespace when the option is set
if (tCoreAdaptiveShift .and. tAdaptiveShift) then
! scale the shift using the abs of this run's complex coefficient
scaledDiagSft(run) = &
shiftFactorFunction( &
rep%indices_of_determ_states(i), run, &
sqrt(rep%full_determ_vecs(&
min_pt, i + rep%determ_displs(iProcIndex))**2 + &
rep%full_determ_vecs(&
max_pt, i + rep%determ_displs(iProcIndex))**2)) &
* DiagSft(run)
else
scaledDiagSft = DiagSft
end if
do part_type = 1, size(rep%partial_determ_vecs, dim=1)
! Convert the index along partial_determ_vecs into a part_type
rep%partial_determ_vecs(part_type, i) = &
rep%partial_determ_vecs(part_type, i) + &
scaledDiagSft(part_type_to_run(rep%min_part() + part_type - 1)) &
* rep%full_determ_vecs(part_type, i + rep%determ_displs(iProcIndex))
end do
end do
#else
do i = 1, rep%determ_sizes(iProcIndex)
! Only scale the shift for the corespace when the option is set
if (tCoreAdaptiveShift .and. tAdaptiveShift) then
! Here, translate between positins in the full_determ_vecs
! and replicas
if (rep%t_global) then
c_run = run
else
c_run = 1
end if
! get the re-scaled shift accounting for undersampling error
scaledDiagSft(run) = DiagSft(run) * shiftFactorFunction( &
rep%indices_of_determ_states(i), run, &
abs(rep%full_determ_vecs(c_run, i + rep%determ_displs(iProcIndex))))
else
scaledDiagSft = DiagSft
end if
rep%partial_determ_vecs(:, i) = rep%partial_determ_vecs(:, i) &
+ scaledDiagSft(run) * rep%full_determ_vecs(:, i + rep%determ_displs(iProcIndex))
end do
#endif
! Now multiply the vector by tau to get the final projected vector.
rep%partial_determ_vecs = rep%partial_determ_vecs * tau
do i = 1, rep%determ_sizes(iProcIndex)
if (tSkipRef(run) .and. DetBitEQ(CurrentDets(:, rep%indices_of_determ_states(i)), &
iLutRef(:, run), nIfD)) then
rep%partial_determ_vecs(:, i) = 0.0_dp
end if
end do
end if
end associate
call halt_timer(SemiStoch_Multiply_Time)
end do
end subroutine determ_projection