!-----------------------------------------------------------------------
!BOP
! !ROUTINE: par_xsum --- Calculate x-sum bit-wise consistently
!
! !INTERFACE:
!****6***0*********0*********0*********0*********0*********0**********72
subroutine par_xsum(grid, a, ltot, sum) 28,13
!****6***0*********0*********0*********0*********0*********0**********72
!
! !USES:
#if defined ( SPMD )
use parutilitiesmodule
, only : parexchangevector
#endif
use dynamics_vars
, only : T_FVDYCORE_GRID
use shr_kind_mod
, only: r8 => shr_kind_r8
use repro_sum_mod
, only : repro_sum, repro_sum_tol_exceeded, &
repro_sum_rel_diff_max, &
repro_sum_recompute
use cam_logfile
, only : iulog
use FVperf_module
, only : FVstartclock, FVstopclock
implicit none
! !INPUT PARAMETERS:
type (T_FVDYCORE_GRID), intent(in) :: grid
integer, intent(in) :: ltot ! number of quantities to be summed
real (r8) a(grid%ifirstxy:grid%ilastxy,ltot) ! input vector to be summed
! !OUTPUT PARAMETERS:
real (r8) sum(ltot) ! sum of all vector entries
! !DESCRIPTION:
! This subroutine calculates the sum of "a" in a reproducible
! (sequentialized) fashion which should give bit-wise identical
! results irrespective of the number of MPI processes.
!
! !CALLED FROM:
! te_map
!
! !REVISION HISTORY:
!
! AAM 00.11.01 : Created
! WS 03.10.22 : pmgrid removed (now spmd_dyn)
! WS 04.10.04 : added grid as an argument; removed spmd_dyn
! WS 05.05.25 : removed ifirst, ilast, im as arguments (in grid)
! PW 08.06.25 : added fixed point reproducible sum
!
!EOP
!---------------------------------------------------------------------
!BOC
! !Local
real(r8), parameter :: D0_0 = 0.0_r8
real(r8) :: rel_diff(2,ltot)
real(r8),allocatable :: l_a(:)
real(r8),allocatable :: a_tmp(:)
integer :: i,ipe,l,im,lim,nprxy_x,offset
integer :: sendcount(grid%nprxy_x)
integer :: recvcount(grid%nprxy_x)
logical :: write_warning
im = grid%im
lim = (grid%ilastxy-grid%ifirstxy) + 1
nprxy_x = grid%nprxy_x
offset = grid%ifirstxy - 1
call FVstartclock
(grid,'xsum_repro_sum')
call repro_sum
(a, sum, lim, lim, ltot, gbl_count=im, &
commid=grid%commxy_x, rel_diff=rel_diff)
call FVstopclock
(grid,'xsum_repro_sum')
! check that "fast" reproducible sum is accurate enough. If not, calculate
! using old method
write_warning = .false.
if (grid%myidxy_x == 0) write_warning = .true.
if ( repro_sum_tol_exceeded
('par_xsum', ltot, write_warning, &
rel_diff) ) then
if ( repro_sum_recompute ) then
call FVstartclock
(grid,'xsum_sumfix')
allocate( l_a(lim*nprxy_x) )
allocate( a_tmp(im) )
sendcount(:) = lim
do l=1,ltot
if (rel_diff(1,l) > repro_sum_rel_diff_max) then
sum(l) = D0_0
#if defined ( SPMD )
do ipe=1,nprxy_x
do i=1,lim
l_a(i+(ipe-1)*lim) = a(i+offset,l)
enddo
enddo
call parexchangevector
( grid%commxy_x, sendcount, l_a, &
recvcount, a_tmp )
do i=1,im
sum(l) = sum(l) + a_tmp(i)
enddo
#else
do i=1,im
sum(l) = sum(l) + a(i,l)
enddo
#endif
endif
enddo
deallocate( a_tmp )
deallocate( l_a )
call FVstopclock
(grid,'xsum_sumfix')
endif
endif
return
!EOC
end subroutine par_xsum
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: par_xsum_r4 --- Calculate x-sum bit-wise consistently (real4)
!
! !INTERFACE:
!****6***0*********0*********0*********0*********0*********0**********72
subroutine par_xsum_r4(grid, a, ltot, sum),13
!****6***0*********0*********0*********0*********0*********0**********72
!
! !USES:
#if defined ( SPMD )
use parutilitiesmodule
, only : parexchangevector
#endif
use dynamics_vars
, only : T_FVDYCORE_GRID
use shr_kind_mod
, only: r8 => shr_kind_r8, r4 => shr_kind_r4
use repro_sum_mod
, only : repro_sum, repro_sum_tol_exceeded, &
repro_sum_rel_diff_max, &
repro_sum_recompute
use cam_logfile
, only : iulog
use FVperf_module
, only : FVstartclock, FVstopclock
implicit none
! !INPUT PARAMETERS:
type (T_FVDYCORE_GRID), intent(in) :: grid
integer, intent(in) :: ltot ! number of quantities to be summed
real (r4) a(grid%ifirstxy:grid%ilastxy,ltot) ! input vector to be summed
! !OUTPUT PARAMETERS:
real (r8) sum(ltot) ! sum of all vector entries
! !DESCRIPTION:
! This subroutine calculates the sum of "a" in a reproducible
! (sequentialized) fashion which should give bit-wise identical
! results irrespective of the number of MPI processes.
!
! !REVISION HISTORY:
!
! WS 05.04.08 : Created from par_xsum
! WS 05.05.25 : removed ifirst, ilast, im as arguments (in grid)
! WS 06.06.28 : Fixed bug in sequential version
! PW 08.06.25 : added fixed point reproducible sum
!
!EOP
!---------------------------------------------------------------------
!BOC
! !Local
real(r8), parameter :: D0_0 = 0.0_r8
real(r8) :: a8(grid%ifirstxy:grid%ilastxy,ltot)
real(r8) :: rel_diff(2,ltot)
real(r4),allocatable :: l_a(:)
real(r4),allocatable :: a_tmp(:)
integer i,ipe,l,im,lim,nprxy_x,offset
integer sendcount(grid%nprxy_x)
integer recvcount(grid%nprxy_x)
logical :: write_warning
im = grid%im
lim = (grid%ilastxy-grid%ifirstxy) + 1
nprxy_x = grid%nprxy_x
offset = grid%ifirstxy - 1
call FVstartclock
(grid,'xsum_r4_repro_sum')
a8(:,:) = a(:,:)
call repro_sum
(a8, sum, lim, lim, ltot, gbl_count=im, &
commid=grid%commxy_x, rel_diff=rel_diff)
call FVstopclock
(grid,'xsum_r4_repro_sum')
! check that "fast" reproducible sum is accurate enough. If not, calculate
! using old method
write_warning = .false.
if (grid%myidxy_x == 0) write_warning = .true.
if ( repro_sum_tol_exceeded
('par_xsum_r4', ltot, write_warning, &
rel_diff) ) then
if ( repro_sum_recompute ) then
call FVstartclock
(grid,'xsum_r4_sumfix')
allocate( l_a(lim*nprxy_x) )
allocate( a_tmp(im) )
sendcount(:) = lim
do l=1,ltot
if (rel_diff(1,l) > repro_sum_rel_diff_max) then
sum(l) = D0_0
#if defined ( SPMD )
do ipe=1,nprxy_x
do i=1,lim
l_a(i+(ipe-1)*lim) = a(i+offset,l)
enddo
enddo
call parexchangevector
( grid%commxy_x, sendcount, l_a, &
recvcount, a_tmp )
do i=1,im
sum(l) = sum(l) + a_tmp(i)
enddo
#else
do i=1,im
sum(l) = sum(l) + a(i,l)
enddo
#endif
endif
enddo
deallocate( a_tmp )
deallocate( l_a )
call FVstopclock
(grid,'xsum_r4_sumfix')
endif
endif
return
!EOC
end subroutine par_xsum_r4
!-----------------------------------------------------------------------