!-----------------------------------------------------------------------
!BOP
! !ROUTINE: geopk --- Calculate geopotential to the kappa
!
!-----------------------------------------------------------------------
! There are three versions of geopk below. The first is the standard
! version and is typically used with transposes between yz and xy
! space. The second (called geopk16) operates in yz space and performs
! semi-global communication in the z direction (to avoid transposes).
! It also can use 16-byte reals to preserve accuracy through round-off;
! this is accomplished by toggling DSIZE to 16 immediately below.
! The third version (called geopk_d) also operates in yz space
! and implements a ring-pipeline algorithm in the z direction.
! Numerics are identical with the first version without requiring
! 16-byte arithmetic. While less parallel, communication costs are
! smaller, and this is often the fastest option.
!
! Note that the interfaces to the first, second, and third versions are
! slightly different. Also, geopk (the standard version with transposes)
! is called for the D-grid during the last two small timesteps in cd_core.
! Geopk16 uses mod_comm communication calls; one can activate the old
! Pilgrim calls (for debugging) by activating PaREXCH immediately below.
!#define PAREXCH
!#define DSIZE 16
#define DSIZE 8
#if (DSIZE == 16)
# define DTWO 2
#else
# define DTWO 1
#endif
!-----------------------------------------------------------------------
!
! !INTERFACE:
subroutine geopk(grid, pe, delp, pk, wz, hs, pt, cp, akap, nx) 2,2
use shr_kind_mod
, only: r8 => shr_kind_r8
use dynamics_vars
, only: T_FVDYCORE_GRID
implicit none
! !INPUT PARAMETERS:
type (T_FVDYCORE_GRID), intent(in) :: grid
integer nx ! # of pieces in longitude direction
real(r8) akap, cp
real(r8) hs(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy)
real(r8) pt(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km)
real(r8) delp(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km)
! !OUTPUT PARAMETERS:
real(r8) wz(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km+1) ! space N*1 S*1
real(r8) pk(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km+1) ! space N*1 S*1
real(r8) pe(grid%ifirstxy:grid%ilastxy,grid%km+1,grid%jfirstxy:grid%jlastxy)
! !DESCRIPTION:
! Calculates geopotential and pressure to the kappa. This is an expensive
! operation and several out arrays are kept around for future use.
!
! !REVISION HISTORY:
!
! WS 99.10.22: MPIed SJ's original SMP version
! SJL 00.01.01: Merged C-core and D-core computation
! SMP "decmposition" in E-W by combining i and j loops
! WS 00.12.01: Replaced MPI_ON with SPMD; hs now distributed
! AAM 01.06.27: Generalize for 2D decomposition
! AAM 01.07.24: Removed dpcheck
! WS 04.10.07: Simplified interface using Grid as input argument
! WS 05.05.25: Merged CAM and GEOS5 versions (mostly CAM)
!
!EOP
!---------------------------------------------------------------------
!BOC
! Local:
real(r8), parameter :: D0_0 = 0.0_r8
integer :: im, jm, km, jfirst, jlast, ifirst, ilast
real(r8) :: ptop
integer i, j, k
integer ixj, jp, it, i1, i2, nxu, itot
real(r8) delpp(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km)
ptop = grid%ptop
im = grid%im
jm = grid%jm
km = grid%km
ifirst = grid%ifirstxy
ilast = grid%ilastxy
jfirst = grid%jfirstxy
jlast = grid%jlastxy
itot = ilast - ifirst + 1
! nxu = nx
nxu = 1
it = itot / nxu
jp = nxu * ( jlast - jfirst + 1 )
!$omp parallel do &
!$omp default(shared) &
!$omp private(i1, i2, ixj, i, j, k )
! do 2000 j=jfirst,jlast
do 2000 ixj=1, jp
j = jfirst + (ixj-1)/nxu
i1 = ifirst + it * mod(ixj-1, nxu)
i2 = i1 + it - 1
do i=i1,i2
pe(i,1,j) = D0_0
wz(i,j,km+1) = D0_0
enddo
! Top down
do k=2,km+1
do i=i1,i2
pe(i,k,j) = pe(i,k-1,j) + delp(i,j,k-1)
enddo
enddo
do k=1,km+1
do i=i1,i2
pe(i,k,j) = pe(i,k,j) + ptop
pk(i,j,k) = pe(i,k,j)**akap
enddo
enddo
! Bottom up
do k=1,km
do i=i1,i2
delpp(i,j,k) = cp*pt(i,j,k)*(pk(i,j,k+1)-pk(i,j,k))
enddo
enddo
do k=km,1,-1
do i=i1,i2
wz(i,j,k) = wz(i,j,k+1)+delpp(i,j,k)
enddo
enddo
do k=1,km+1
do i=i1,i2
wz(i,j,k) = wz(i,j,k)+hs(i,j)
enddo
enddo
2000 continue
return
!EOC
end subroutine geopk
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: geopk16 --- Calculate geopotential to the kappa
!
! !INTERFACE:
subroutine geopk16(grid, pe, delp, pk, wz, hs, pt, ng, cp, akap ) 2,14
use shr_kind_mod
, only : r8 => shr_kind_r8, i8 => shr_kind_i8
use decompmodule
, only : decomptype
use dynamics_vars
, only : T_FVDYCORE_GRID
#if defined( SPMD )
use parutilitiesmodule
, only : parexchangevector
use mod_comm
, only : blockdescriptor, get_partneroffset, &
mp_sendirr, mp_recvirr, max_nparcels
use spmd_dyn
, only: npes_yz
#endif
implicit none
#if defined ( SPMD )
#include "mpif.h"
#endif
! !INPUT PARAMETERS:
type (T_FVDYCORE_GRID), intent(in) :: grid
integer, intent(in) :: ng ! Halo size (not always = ng_d)
real(r8) akap, cp
real(r8) hs(1:grid%im,grid%jfirst:grid%jlast)
! !INPUT PARAMETERS:
real(r8) pt(1:grid%im,grid%jfirst-ng:grid%jlast+ng,grid%kfirst:grid%klast)
real(r8) delp(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)
! !OUTPUT PARAMETERS:
real(r8) wz(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1) ! space N*1 S*1
real(r8) pk(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1) ! space N*1 S*1
real(r8) pe(1:grid%im,grid%kfirst:grid%klast+1,grid%jfirst:grid%jlast) ! temporary variable
! !DESCRIPTION:
! Calculates geopotential and pressure to the kappa. This is an expensive
! operation and several out arrays are kept around for future use.
! To preserve accuracy through round-off, 16-byte reals are used
! for some intermediate data.
!
! !REVISION HISTORY:
!
! AAM 00.12.18: Original version
! AAM 03.01.21: Use mod_comm
! WS 03.11.19: Merged latest CAM version (by AAM)
! WS 04.10.07: Simplified interface using Grid as input argument
! WS 05.05.17: Merged CAM and GEOS5 versions
!
!EOP
!---------------------------------------------------------------------
!BOC
#ifndef NO_CRAY_POINTERS
! Local:
integer :: i, j, k, nk, ijtot, ierror, ione
integer :: im,jm,km, ifirst, ilast, jfirst, jlast, kfirst, klast
real(r8):: ptop
integer :: npr_y, npr_z, myid_y, myid_z
integer :: twod_decomp, mod_geopk
#if (DSIZE == 16)
#ifdef NO_R16
integer,parameter :: r16= selected_real_kind(12) ! 8 byte real
#else
integer,parameter :: r16= selected_real_kind(24) ! 16 byte real
#endif
real(r16), parameter :: DP0_0 = 0.0_r16
real(r16) delp16(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)
real(r16) pe16(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1)
real(r16) inbuf(1:grid%im,grid%jfirst:grid%jlast,0:grid%npr_z-1)
real(r16) outbuf(1:grid%im,grid%jfirst:grid%jlast,0:grid%npr_z-1)
#else
real (r8), parameter :: DP0_0 = 0.0_r8
real (r8) delp16(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)
real (r8) pe16(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1)
real (r8) inbuf(1:grid%im,grid%jfirst:grid%jlast,0:grid%npr_z-1)
real (r8) outbuf(1:grid%im,grid%jfirst:grid%jlast,0:grid%npr_z-1)
#endif
integer sendcount(0:grid%npr_z-1), recvcount(0:grid%npr_z-1)
#if defined(SPMD)
!
! data structures for mp_sendirr, mp_recvirr
!
type (blockdescriptor), allocatable, save :: sendbl1(:), recvbl1(:)
type (blockdescriptor), allocatable, save :: sendbl2(:), recvbl2(:)
#endif
integer first_time_through
data first_time_through / 0 /
! Arrays inbuf8 and outbuf8 are created to fool the compiler
! into accepting them as calling arguments for parexchangevector.
! The trickery below equivalences them to inbuf and outbuf
real (r8) inbuf8(1), outbuf8(1)
pointer (ptr_inbuf8, inbuf8)
pointer (ptr_outbuf8, outbuf8)
integer (i8) locinbuf, locoutbuf
!
! Initialize variables from Grid
!
ptop = grid%ptop
im = grid%im
jm = grid%jm
km = grid%km
ifirst = 1 ! 2004.10.04 (WS): Now hardwired for 1..im
ilast = grid%im ! Code was always used in this mode
jfirst = grid%jfirst
jlast = grid%jlast
kfirst = grid%kfirst
klast = grid%klast
myid_y = grid%myid_y
myid_z = grid%myid_z
npr_y = grid%npr_y
npr_z = grid%npr_z
twod_decomp = grid%twod_decomp
mod_geopk = grid%mod_geopk
ijtot = (jlast-jfirst+1) * (ilast-ifirst+1)
#if defined (SPMD)
if (first_time_through .eq. 0) then
first_time_through = 1
ione = 1
if (npr_z .gt. 1) then
allocate( sendbl1(0:npes_yz-1) )
allocate( recvbl1(0:npes_yz-1) )
allocate( sendbl2(0:npes_yz-1) )
allocate( recvbl2(0:npes_yz-1) )
do nk = 0,npes_yz-1
sendbl1(nk)%method = mod_geopk
sendbl2(nk)%method = mod_geopk
recvbl1(nk)%method = mod_geopk
recvbl2(nk)%method = mod_geopk
! allocate for either method (safety)
allocate( sendbl1(nk)%blocksizes(1) )
allocate( sendbl1(nk)%displacements(1) )
allocate( recvbl1(nk)%blocksizes(1) )
allocate( recvbl1(nk)%displacements(1) )
allocate( sendbl2(nk)%blocksizes(1) )
allocate( sendbl2(nk)%displacements(1) )
allocate( recvbl2(nk)%blocksizes(1) )
allocate( recvbl2(nk)%displacements(1) )
sendbl1(nk)%type = MPI_DATATYPE_NULL
if ( (nk/npr_y) > myid_z .and. mod(nk,npr_y) == myid_y ) then
if (mod_geopk .ne. 0) then
call MPI_TYPE_INDEXED(ione, DTWO*ijtot, &
DTWO*ijtot*(klast-kfirst+1), MPI_DOUBLE_PRECISION, &
sendbl1(nk)%type, ierror)
call MPI_TYPE_COMMIT(sendbl1(nk)%type, ierror)
endif
sendbl1(nk)%blocksizes(1) = DTWO*ijtot
sendbl1(nk)%displacements(1) = DTWO*ijtot*(klast-kfirst+1)
sendbl1(nk)%partneroffset = myid_z * ijtot * DTWO
else
sendbl1(nk)%blocksizes(1) = 0
sendbl1(nk)%displacements(1) = 0
sendbl1(nk)%partneroffset = 0
endif
sendbl1(nk)%nparcels = size(sendbl1(nk)%displacements)
sendbl1(nk)%tot_size = sum(sendbl1(nk)%blocksizes)
max_nparcels = max(max_nparcels, sendbl1(nk)%nparcels)
recvbl1(nk)%type = MPI_DATATYPE_NULL
if ( (nk/npr_y) < myid_z .and. mod(nk,npr_y) == myid_y ) then
if (mod_geopk .ne. 0) then
call MPI_TYPE_INDEXED(ione, DTWO*ijtot, &
nk/npr_y * ijtot * DTWO, MPI_DOUBLE_PRECISION, &
recvbl1(nk)%type, ierror)
call MPI_TYPE_COMMIT(recvbl1(nk)%type, ierror)
endif
recvbl1(nk)%blocksizes(1) = DTWO*ijtot
recvbl1(nk)%displacements(1) = nk/npr_y * ijtot * DTWO
recvbl1(nk)%partneroffset = 0
else
recvbl1(nk)%blocksizes(1) = 0
recvbl1(nk)%displacements(1) = 0
recvbl1(nk)%partneroffset = 0
endif
recvbl1(nk)%nparcels = size(recvbl1(nk)%displacements)
recvbl1(nk)%tot_size = sum(recvbl1(nk)%blocksizes)
max_nparcels = max(max_nparcels, recvbl1(nk)%nparcels)
if ( (nk/npr_y) < myid_z .and. mod(nk,npr_y) == myid_y ) then
call MPI_TYPE_INDEXED(ione, DTWO*ijtot, &
0, MPI_DOUBLE_PRECISION, &
sendbl2(nk)%type, ierror)
call MPI_TYPE_COMMIT(sendbl2(nk)%type, ierror)
sendbl2(nk)%blocksizes(1) = DTWO*ijtot
sendbl2(nk)%displacements(1) = 0
sendbl2(nk)%partneroffset = (myid_z-nk/npr_y-1) * ijtot * DTWO
else
sendbl2(nk)%type = MPI_DATATYPE_NULL
sendbl2(nk)%blocksizes(1) = 0
sendbl2(nk)%displacements(1) = 0
sendbl2(nk)%partneroffset = 0
endif
sendbl2(nk)%nparcels = size(sendbl2(nk)%displacements)
sendbl2(nk)%tot_size = sum(sendbl2(nk)%blocksizes)
max_nparcels = max(max_nparcels, sendbl2(nk)%nparcels)
if ( (nk/npr_y) > myid_z .and. mod(nk,npr_y) == myid_y ) then
call MPI_TYPE_INDEXED(ione, DTWO*ijtot, &
nk/npr_y * ijtot * DTWO, MPI_DOUBLE_PRECISION, &
recvbl2(nk)%type, ierror)
call MPI_TYPE_COMMIT(recvbl2(nk)%type, ierror)
recvbl2(nk)%blocksizes(1) = DTWO*ijtot
recvbl2(nk)%displacements(1) = nk/npr_y * ijtot * DTWO
recvbl2(nk)%partneroffset = 0
else
recvbl2(nk)%type = MPI_DATATYPE_NULL
recvbl2(nk)%blocksizes(1) = 0
recvbl2(nk)%displacements(1) = 0
recvbl2(nk)%partneroffset = 0
endif
recvbl2(nk)%nparcels = size(recvbl2(nk)%displacements)
recvbl2(nk)%tot_size = sum(recvbl2(nk)%blocksizes)
max_nparcels = max(max_nparcels, recvbl2(nk)%nparcels)
enddo
call get_partneroffset
(grid%commyz, sendbl1, recvbl1)
call get_partneroffset
(grid%commyz, sendbl2, recvbl2)
endif
endif
#if (!defined PAREXCH)
locinbuf = loc(pe16)
#else
locinbuf = loc(inbuf)
#endif
locoutbuf = loc(outbuf)
ptr_inbuf8 = locinbuf
ptr_outbuf8 = locoutbuf
#endif
! Top down
#if (DSIZE == 16)
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k)
do k = kfirst,klast
do j = jfirst,jlast
do i = ifirst,ilast
delp16(i,j,k) = delp(i,j,k)
enddo
enddo
enddo
#endif
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j)
do j = jfirst,jlast
do i = ifirst,ilast
pe16(i,j,kfirst) = DP0_0
enddo
enddo
! compute partial sums
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k)
do j = jfirst,jlast
do k = kfirst+1,klast+1
do i = ifirst,ilast
#if (DSIZE == 16)
pe16(i,j,k) = pe16(i,j,k-1) + delp16(i,j,k-1)
#else
pe16(i,j,k) = pe16(i,j,k-1) + delp(i,j,k-1)
#endif
enddo
enddo
enddo
#if defined( SPMD )
if (npr_z .gt. 1) then
! communicate upward
# if !defined (PAREXCH)
call mp_sendirr
(grid%commyz, sendbl1, recvbl1, inbuf8, outbuf8, &
modc=grid%modc_cdcore )
call mp_recvirr
(grid%commyz, sendbl1, recvbl1, inbuf8, outbuf8, &
modc=grid%modc_cdcore )
# else
do nk = 0, npr_z-1
sendcount(nk) = 0
recvcount(nk) = 0
enddo
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, nk)
do nk = myid_z+1, npr_z-1
do j = jfirst,jlast
do i = ifirst,ilast
inbuf(i,j,nk-myid_z-1) = pe16(i,j,klast+1)
enddo
enddo
! Double sendcount since quantities are 16-bytes long
sendcount(nk) = DTWO*ijtot
enddo
call parexchangevector
(grid%comm_z, sendcount, inbuf8, recvcount, outbuf8)
# endif
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k, nk)
do k = kfirst,klast+1
do nk = 0, myid_z-1
do j = jfirst,jlast
do i = ifirst,ilast
pe16(i,j,k) = pe16(i,j,k) + outbuf(i,j,nk)
enddo
enddo
enddo
enddo
endif
#endif
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k)
do k = kfirst,klast+1
do j = jfirst,jlast
do i = ifirst,ilast
pe(i,k,j) = pe16(i,j,k) + ptop
pk(i,j,k) = pe(i,k,j) ** akap
enddo
enddo
enddo
! Bottom up
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k)
do k = kfirst,klast
do j = jfirst,jlast
do i = ifirst,ilast
delp16(i,j,k) = cp*pt(i,j,k)*(pk(i,j,k+1)-pk(i,j,k))
enddo
enddo
enddo
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j)
do j = jfirst,jlast
do i = ifirst,ilast
pe16(i,j,klast+1) = DP0_0
enddo
enddo
! compute partial sums
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k)
do j = jfirst,jlast
do k = klast,kfirst,-1
do i = ifirst,ilast
pe16(i,j,k) = pe16(i,j,k+1) + delp16(i,j,k)
enddo
enddo
enddo
#if defined( SPMD )
if (npr_z .gt. 1) then
! communicate downward
# if !defined (PAREXCH)
call mp_sendirr
(grid%commyz, sendbl2, recvbl2, inbuf8, outbuf8, &
modc=grid%modc_cdcore )
call mp_recvirr
(grid%commyz, sendbl2, recvbl2, inbuf8, outbuf8, &
modc=grid%modc_cdcore )
# else
do nk = 0, npr_z-1
sendcount(nk) = 0
recvcount(nk) = 0
enddo
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, nk)
do nk = 0, myid_z-1
do j = jfirst,jlast
do i = ifirst,ilast
inbuf(i,j,nk) = pe16(i,j,kfirst)
enddo
enddo
! Double sendcount since quantities are 16-bytes long
sendcount(nk) = DTWO*ijtot
enddo
call parexchangevector
(grid%comm_z, sendcount, inbuf8, recvcount, outbuf8)
# endif
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k, nk)
do k = kfirst,klast+1
do nk = myid_z+1, npr_z-1
do j = jfirst,jlast
do i = ifirst,ilast
# if !defined (PAREXCH)
pe16(i,j,k) = pe16(i,j,k) + outbuf(i,j,nk)
# else
pe16(i,j,k) = pe16(i,j,k) + outbuf(i,j,nk-myid_z-1)
# endif
enddo
enddo
enddo
enddo
endif
#endif
!$omp parallel do &
!$omp default(shared) &
!$omp private(i, j, k)
do k = kfirst,klast+1
do j = jfirst,jlast
do i = ifirst,ilast
wz(i,j,k) = pe16(i,j,k) + hs(i,j)
enddo
enddo
enddo
return
! endif for NO_CRAY_POINTERS
#endif
!EOC
end subroutine geopk16
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: geopk_d --- Calculate geopotential to the kappa
!
! !INTERFACE:
subroutine geopk_d( grid, pe, delp, pk, wz, hs, pt, ng, cp, akap ) 2,2
use shr_kind_mod
, only : r8 => shr_kind_r8, i8 => shr_kind_i8
use dynamics_vars
, only : T_FVDYCORE_GRID
implicit none
#if defined ( SPMD )
#include "mpif.h"
#endif
! !INPUT PARAMETERS:
type (T_FVDYCORE_GRID), intent(in) :: grid
integer, intent(in) :: ng ! Halo size (not always = ng_d)
real(r8) akap, cp
real(r8) hs(1:grid%im,grid%jfirst:grid%jlast)
! !INPUT PARAMETERS:
real(r8) pt(1:grid%im,grid%jfirst-ng:grid%jlast+ng,grid%kfirst:grid%klast)
real(r8) delp(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)
! !OUTPUT PARAMETERS:
real(r8) wz(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1) ! space N*1 S*1
real(r8) pk(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1) ! space N*1 S*1
real(r8) pe(1:grid%im,grid%kfirst:grid%klast+1,grid%jfirst:grid%jlast) ! temporary variable
! !DESCRIPTION:
! Calculates geopotential and pressure to the kappa. This is an expensive
! operation and several out arrays are kept around for future use.
! To preserve reproducibility, ordering of transposed-based geopk algorithm
! is preserved at the cost of a serialization of computation in the Z-direction.
!
! !REVISION HISTORY:
!
! PW 08.06.27: Original: simple ring r8 version of geopk16 -
! serialized Z-direction but minimized communication overhead
!
!EOP
!---------------------------------------------------------------------
!BOC
! Local:
real(r8):: ptop
integer :: km, ifirst, ilast, jfirst, jlast, kfirst, klast
integer :: npr_z
integer :: itot, jtot
integer :: n_blocks
logical :: sendd
integer :: i, il, ilmax, ib, iblksiz
integer :: j, jl, jlmax, jb, jblksiz
integer :: k, block, ierror
integer :: klimits(2), klimits_all(2,0:grid%npr_z-1)
integer, save :: k_succ_pid, k_pred_pid
integer, allocatable :: rcvreq(:), sndreq(:)
real (r8), parameter :: DP0_0 = 0.0_r8
real (r8) l_pe(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1)
real (r8) l_delp(1:grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)
real (r8) inbuf(1:grid%im,grid%jfirst:grid%jlast,0:grid%npr_z-1)
real (r8) outbuf(1:grid%im,grid%jfirst:grid%jlast,0:grid%npr_z-1)
integer sendcount(0:grid%npr_z-1), recvcount(0:grid%npr_z-1)
integer first_time_through
data first_time_through / 0 /
#if defined ( SPMD )
integer status (MPI_STATUS_SIZE) ! Status of message
#endif
!
! Initialize variables from Grid
!
ptop = grid%ptop
km = grid%km
ifirst = 1 ! 2004.10.04 (WS): Now hardwired for 1..im
ilast = grid%im ! Code was always used in this mode
jfirst = grid%jfirst
jlast = grid%jlast
kfirst = grid%kfirst
klast = grid%klast
npr_z = grid%npr_z
itot = (ilast-ifirst+1)
jtot = (jlast-jfirst+1)
if (grid%modc_cdcore(3) .eq. 1) then
sendd = .true.
else
sendd = .false.
endif
n_blocks = max(1,grid%geopkblocks)
if (n_blocks < jtot) then
jblksiz = ceiling(float(jtot)/float(n_blocks))
iblksiz = itot
else
jblksiz = 1
iblksiz = ceiling(float(itot*jtot)/float(n_blocks))
endif
block = 0
do j=jfirst,jlast,jblksiz
do i=ifirst,ilast,iblksiz
block = block + 1
enddo
enddo
allocate( sndreq(block) )
allocate( rcvreq(block) )
if (first_time_through .eq. 0) then
first_time_through = 1
k_pred_pid = -1
k_succ_pid = -1
#if defined (SPMD)
klimits(1) = kfirst
klimits(2) = klast
call mpi_allgather (klimits, 2, mpi_integer, &
klimits_all, 2, mpi_integer, &
grid%comm_z, ierror)
do i=0,npr_z-1
if (klimits_all(2,i) == kfirst-1) k_pred_pid = i
if (klimits_all(1,i) == klast+1) k_succ_pid = i
enddo
#endif
endif
! Top down
! prepost first set of receive requests
#if defined (SPMD)
if (k_pred_pid /= -1) then
block = 0
do j=jfirst,jlast,jblksiz
if (j+jblksiz > jlast) then
jb = jlast-j+1
else
jb = jblksiz
endif
do i=ifirst,ilast,iblksiz
if (i+iblksiz > ilast) then
ib = ilast-i+1
else
ib = iblksiz
endif
block = block + 1
call mpi_irecv (l_pe(i,j,kfirst), jb*ib, &
mpi_real8, k_pred_pid, block, &
grid%comm_z, rcvreq(block), ierror)
enddo
enddo
endif
#endif
block = 0
do j=jfirst,jlast,jblksiz
if (j+jblksiz > jlast) then
jb = jlast-j+1
else
jb = jblksiz
endif
jlmax = j+jb-1
do i=ifirst,ilast,iblksiz
if (i+iblksiz > ilast) then
ib = ilast-i+1
else
ib = iblksiz
endif
ilmax = i+ib-1
block = block + 1
! get data from k predecessor
if (k_pred_pid /= -1) then
#if defined (SPMD)
call mpi_wait (rcvreq(block), status, ierror)
#endif
else
do jl=j,jlmax
do il = i,ilmax
l_pe(il,jl,kfirst) = DP0_0
enddo
enddo
endif
! compute partial sums (note that can not thread over k-loop)
do k = kfirst+1,klast+1
do jl=j,jlmax
do il = i,ilmax
l_pe(il,jl,k) = l_pe(il,jl,k-1) + delp(il,jl,k-1)
enddo
enddo
enddo
! send results to k successor
#if defined (SPMD)
if (k_succ_pid /= -1) then
if (sendd) then
call mpi_send (l_pe(i,j,klast+1), jb*ib, mpi_real8, &
k_succ_pid, block, grid%comm_z, &
ierror)
else
call mpi_isend (l_pe(i,j,klast+1), jb*ib, mpi_real8, &
k_succ_pid, block, grid%comm_z, &
sndreq(block), ierror)
endif
endif
#endif
!$omp parallel do &
!$omp default(shared) &
!$omp private(il, jl, k)
do k = kfirst,klast+1
do jl = j,jlmax
do il = i,ilmax
pe(il,k,jl) = l_pe(il,jl,k) + ptop
pk(il,jl,k) = pe(il,k,jl) ** akap
enddo
enddo
enddo
#if defined (SPMD)
if (k_succ_pid /= -1) then
if (.not. sendd) then
call mpi_wait (sndreq(block), status, ierror)
endif
endif
#endif
enddo
enddo
! Bottom up
! prepost second set of receive requests
#if defined (SPMD)
if (k_succ_pid /= -1) then
block = 0
do j=jfirst,jlast,jblksiz
if (j+jblksiz > jlast) then
jb = jlast-j+1
else
jb = jblksiz
endif
do i=ifirst,ilast,iblksiz
if (i+iblksiz > ilast) then
ib = ilast-i+1
else
ib = iblksiz
endif
block = block + 1
call mpi_irecv (l_pe(i,j,klast+1), jb*ib, &
mpi_real8, k_succ_pid, block, &
grid%comm_z, rcvreq(block), ierror)
enddo
enddo
endif
#endif
block = 0
do j=jfirst,jlast,jblksiz
if (j+jblksiz > jlast) then
jb = jlast-j+1
else
jb = jblksiz
endif
jlmax = j+jb-1
do i=ifirst,ilast,iblksiz
if (i+iblksiz > ilast) then
ib = ilast-i+1
else
ib = iblksiz
endif
ilmax = i+ib-1
block = block + 1
!$omp parallel do &
!$omp default(shared) &
!$omp private(il, jl, k)
do k = kfirst,klast
do jl=j,jlmax
do il = i,ilmax
l_delp(il,jl,k) = &
cp*pt(il,jl,k)*(pk(il,jl,k+1)-pk(il,jl,k))
enddo
enddo
enddo
! get data from k predecessor
if (k_succ_pid /= -1) then
#if defined (SPMD)
call mpi_wait (rcvreq(block), status, ierror)
#endif
else
do jl=j,jlmax
do il = i,ilmax
l_pe(il,jl,klast+1) = DP0_0
enddo
enddo
endif
! compute partial sums (note that can not thread over k-loop)
do k = klast,kfirst,-1
do jl=j,jlmax
do il = i,ilmax
l_pe(il,jl,k) = l_pe(il,jl,k+1) + l_delp(il,jl,k)
enddo
enddo
enddo
! send results to k predecessor
#if defined (SPMD)
if (k_pred_pid /= -1) then
if (sendd) then
call mpi_send (l_pe(i,j,kfirst), jb*ib, mpi_real8, &
k_pred_pid, block, &
grid%comm_z, ierror)
else
call mpi_isend (l_pe(i,j,kfirst), jb*ib, mpi_real8, &
k_pred_pid, block, &
grid%comm_z, sndreq(block), ierror)
endif
endif
#endif
!$omp parallel do &
!$omp default(shared) &
!$omp private(il, jl, k)
do k = kfirst,klast+1
do jl=j,jlmax
do il = i,ilmax
wz(il,jl,k) = l_pe(il,jl,k) + hs(il,jl)
enddo
enddo
enddo
#if defined (SPMD)
if (k_pred_pid /= -1) then
if (.not. sendd) then
call mpi_wait (sndreq(block), status, ierror)
endif
endif
#endif
enddo
enddo
deallocate( sndreq )
deallocate( rcvreq )
return
!EOC
end subroutine geopk_d
!-----------------------------------------------------------------------