#if !defined(STAND_ALONE)
#endif
#define _SMEMORY 1
!-----------------------------------------------------------------------
! Nasa/GSFC, Data Assimilation Office, Code 910.3, GEOS/DAS
!-----------------------------------------------------------------------
MODULE parutilitiesmodule 38
#if defined( SPMD )
!BOP
!
! !MODULE: parutilitiesmodule
!
! !USES:
#if defined( STAND_ALONE )
# define iulog 6
#else
use cam_logfile
, only: iulog
#endif
#if !defined(STAND_ALONE)
USE shr_kind_mod
, only: r8 => shr_kind_r8, i8 => shr_kind_i8, &
r4 => shr_kind_r4
#endif
USE mod_comm
, ONLY : commglobal, gid, numpro, blockdescriptor, max_nparcels
#include "debug.h"
IMPLICIT NONE
#include "mpif.h"
#include "pilgrim.h"
!
! !PUBLIC DATA MEMBERS:
PUBLIC Gsize
PUBLIC INT4, REAL4, REAL8
PUBLIC SUMOP, MAXOP, MINOP, BCSTOP
INTEGER,SAVE :: GSize ! Size of communicator CommGlobal
! Equivalent to mod_comm::numpro
#define CPP_SUM_OP 101
#define CPP_MAX_OP 102
#define CPP_MIN_OP 103
#define CPP_BCST_OP 104
INTEGER,SAVE :: INT4 = MPI_INTEGER
INTEGER,SAVE :: REAL4 = MPI_REAL
INTEGER,SAVE :: REAL8 = MPI_DOUBLE_PRECISION
INTEGER,SAVE :: SUMOP = MPI_SUM
INTEGER,SAVE :: MAXOP = MPI_MAX
INTEGER,SAVE :: MINOP = MPI_MIN
INTEGER,SAVE :: BCSTOP = CPP_BCST_OP
! !PUBLIC MEMBER FUNCTIONS:
PUBLIC ParPatternType
TYPE ParPatternType
INTEGER :: Comm ! Communicator
INTEGER :: Iam ! My rank in communicator
INTEGER :: Size ! Size of communicator
TYPE(BlockDescriptor), POINTER :: SendDesc(:) ! Array of descriptors
TYPE(BlockDescriptor), POINTER :: RecvDesc(:) ! Array of descriptors
END TYPE ParPatternType
#ifdef _SMEMORY
TYPE ParInfoType
INTEGER :: numRecvSeg ! number of received segments
INTEGER :: numSendSeg ! number of send segments
INTEGER :: maxNumSeg ! maximum number of segments over all processors
INTEGER :: numRecvNeigh ! number of receive neighbors
INTEGER :: numSendNeigh ! number of send neighbors
END TYPE ParInfoType
#endif
PUBLIC ParInit, ParSplit, ParFree, ParExit
PUBLIC ParScatter, ParGather
PUBLIC ParBeginTransfer, ParEndTransfer
PUBLIC ParExchangeVector, ParCollective
PUBLIC ParPatternCreate, ParPatternFree
INTERFACE ParPatternCreate 31
MODULE PROCEDURE ParPatternCopy
MODULE PROCEDURE ParPatternGhost
MODULE PROCEDURE ParPatternDecompToDecomp
MODULE PROCEDURE ParPatternDecompToGhost
MODULE PROCEDURE ParPatternGhostToDecomp
MODULE PROCEDURE ParPatternGhostToGhost
END INTERFACE
INTERFACE ParScatter 1
MODULE PROCEDURE ParScatterReal
MODULE PROCEDURE ParScatterReal4
MODULE PROCEDURE ParScatterInt
END INTERFACE
INTERFACE ParGather 1
MODULE PROCEDURE ParGatherReal
MODULE PROCEDURE ParGatherReal4
MODULE PROCEDURE ParGatherInt
END INTERFACE
INTERFACE ParBeginTransfer 3
MODULE PROCEDURE ParBeginTransferReal
MODULE PROCEDURE ParBeginTransferPattern1D
MODULE PROCEDURE ParBeginTransferPattern1Dint
MODULE PROCEDURE ParBeginTransferPattern2D
MODULE PROCEDURE ParBeginTransferPattern3D
MODULE PROCEDURE ParBeginTransferPattern4D
! MODULE PROCEDURE ParBeginTransferInt
END INTERFACE
INTERFACE ParEndTransfer 3
MODULE PROCEDURE ParEndTransferReal
MODULE PROCEDURE ParEndTransferPattern1D
MODULE PROCEDURE ParEndTransferPattern1Dint
MODULE PROCEDURE ParEndTransferPattern2D
MODULE PROCEDURE ParEndTransferPattern3D
MODULE PROCEDURE ParEndTransferPattern4D
! MODULE PROCEDURE ParEndTransferInt
END INTERFACE
INTERFACE ParExchangeVector 8
MODULE PROCEDURE ParExchangeVectorReal
MODULE PROCEDURE ParExchangeVectorReal4
MODULE PROCEDURE ParExchangeVectorInt
END INTERFACE
INTERFACE ParCollective 18
MODULE PROCEDURE ParCollectiveBarrier
MODULE PROCEDURE ParCollective0D
MODULE PROCEDURE ParCollective1D
MODULE PROCEDURE ParCollective1DReal4
MODULE PROCEDURE ParCollective2D
MODULE PROCEDURE ParCollective2DReal4
MODULE PROCEDURE ParCollective3D
MODULE PROCEDURE ParCollective0DInt
MODULE PROCEDURE ParCollective0DStr
MODULE PROCEDURE ParCollective1DInt
MODULE PROCEDURE ParCollective1DStr
MODULE PROCEDURE ParCollective2DInt
END INTERFACE
#ifdef _SMEMORY
INTERFACE ParCalcInfo 4
MODULE PROCEDURE ParCalcInfoDecompToGhost
MODULE PROCEDURE ParCalcInfoDecompToDecomp
MODULE PROCEDURE ParCalcInfoGhostToGhost
MODULE PROCEDURE ParCalcInfoGhostToDecomp
END INTERFACE
#endif
!
! !DESCRIPTION:
!
! This module provides the basic utilities to support parallelism
! on a distributed or shared memory multiprocessor.
!
! \begin{center}
! \begin{tabular}{|l|l|} \hline \hline
! ParInit & Initialize the parallel system \\ \hline
! ParExit & Exit from the parallel system \\ \hline
! ParSplit & Create a Compute grid of PEs \\ \hline
! ParFree & Free a split communicator \\ \hline
! ParScatter & Scatter global slice to local slices \\ \hline
! ParGather & Gather local slices to one global \\ \hline
! ParBeginTransfer & Initiate an all-to-all packet transfer \\ \hline
! ParEndTransfer & Complete an all-to-all packet transfer \\ \hline
! ParExchangeVector & Complete an all-to-all packet transfer \\ \hline
! ParCollective & Collective operation across communicator \\ \hline
! \end{tabular}
! \end{center}
! \vspace{2mm}
!
! Other utilities can be added to this module as needs evolve.
!
! Conceptually the intention is to aggregate as many of the
! MPI communication calls as possible into a well-maintained
! module. This will help avoid the occurrence of MPI spaghetti
! code.
!
! This module is tailored to GEOS DAS and implements the
! design of Lucchesi/Mirin/Sawyer/Larson.
!
! !REVISION HISTORY:
! 97.02.01 Sawyer Creation
! 97.07.22 Sawyer Removal of DecompType related subroutines
! 97.08.13 Sawyer Added ParScatter/Gather for Integers
! 97.09.26 Sawyer Additions of Sparse communication primitives
! 97.12.01 Sawyer Changed all MPI_SSEND to MPI_ISEND
! 97.12.23 Lucchesi Added member variables IsIONode and InterComm
! 98.01.06 Sawyer Additions from RL for I/O Nodes
! 98.02.02 Sawyer Added the Cartesian data members
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.02.23 Sawyer Added ghosting utilities
! 98.02.25 Sawyer Modified interface of BeginTransfer
! 98.03.03 Sawyer Added Global ID number to public data members
! 98.03.25 Sawyer Added documentation for walkthrough
! 98.04.16 Sawyer Removed all use of MPI_CART (CommRow redefined)
! 98.07.23 Sawyer Added ParGhost, ParPoleDot; ParBegin/EndGhost out
! 98.09.15 Sawyer Added ParMerge, ParPoleGhost
! 98.09.17 Sawyer Added ParSum, removed ParPoleDot
! 99.01.18 Sawyer Minor cleaning
! 99.03.04 Sawyer Revised SHMEM concept for Transfer
! 99.04.22 Sawyer Removed COMMON for handles -- they are
! always used in same program unit.
! 99.05.21 Sawyer Reintroduced barriers in Scatter/Gather
! 99.06.03 Sawyer USE_SHMEM revisions
! 99.12.10 Sawyer ParInit now sets GID, Gsize
! 99.12.13 Sawyer Version slimmed down for FVCCM release
! 00.06.14 Sawyer Precision module now used
! 00.07.07 Sawyer Removed 2D scatter/gather; simplified API
! 00.07.30 Sawyer Full implementation with shared memory
! 00.08.09 Sawyer Replaced ParSum with ParCollective
! 00.08.28 Sawyer Moved LLNL 2D data to LLNL2DModule; new MLP impl
! 01.02.04 Sawyer Added PatternType and related routines
! 01.02.12 Sawyer Converted to free format
! 02.10.30 Sawyer Welded with mod_comm
! 03.03.06 Sawyer Fix parpatterncreate for MPI2; use MPI_DATATYPE_NULL
! 05.10.12 Worley Support for vectorization modifications in mod_comm
! 06.03.01 Sawyer Merged CAM and GEOS5 versions
! 07.01.05 Mirin Eliminated direct use of Gsize
! 07.09.04 Dennis Reduced temporary memory usage
!
! !BUGS:
! There are several MPI_Barriers at locations in the code.
! These avoid potential race conditions which probably only occur
! if the number of real processors is less than the number of
! message passing processes. Remove these barriers at your own risk
!
!EOP
INTEGER, SAVE :: InHandle(MAX_PAX, MAX_SMP, MAX_TRF)
INTEGER, SAVE :: OutHandle(MAX_PAX,MAX_SMP, MAX_TRF)
INTEGER, SAVE :: BegTrf = 0 ! Ongoing overlapped begintransfer #
INTEGER, SAVE :: EndTrf = 0 ! Ongoing overlapped endtransfer #
LOGICAL, SAVE :: Initialized = .FALSE. ! Flag for initialization of parutilitiesmodule.
CONTAINS
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParInit --- Initialize the parallel execution
!
! !INTERFACE:
SUBROUTINE ParInit ( Comm, npryzxy, mod_method, mod_geopk, mod_gatscat, mod_maxirr ) 1,2
!
! !USES:
USE mod_comm
, ONLY : mp_init
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, OPTIONAL :: Comm
INTEGER, OPTIONAL, INTENT(IN) :: npryzxy(4) ! 2D decompositions
INTEGER, OPTIONAL, INTENT(IN) :: mod_method ! CAM optimization
INTEGER, OPTIONAL, INTENT(IN) :: mod_geopk ! CAM optimization
INTEGER, OPTIONAL, INTENT(IN) :: mod_gatscat ! CAM optimization
INTEGER, OPTIONAL, INTENT(IN) :: mod_maxirr ! CAM max simul. trsps.
!
! !DESCRIPTION:
! Initializes the system. In MPI mode, call MPI\_INIT if not done
! already. If the optional arguments are not provided, default
! values will be chosen. But it is advisable to provide COMM
! (main communicator) and NPRYZXY (internal 2D decomposition).
!
! !SYSTEM ROUTINES:
! MPI_INITIALIZED, MPI_INIT
!
! !REVISION HISTORY:
! 97.03.20 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.07.03 Sawyer Reformulated documentation
! 00.07.23 Sawyer Added shared memory arena implementation
! 02.10.30 Sawyer Now uses mp_init from mod_comm
! 06.06.15 Sawyer Added CAM optimizations (passed to mod_comm)
!
!EOP
!-----------------------------------------------------------------------
!BOC
! Initialize mod_comm
IF (.NOT. Initialized) THEN
CALL mp_init
( Comm, npryzxy, mod_method, mod_geopk, mod_gatscat, mod_maxirr )
Gsize = numpro ! Support PILGRIM's Gsize for now
Initialized = .TRUE.
ENDIF
RETURN
!EOC
END SUBROUTINE ParInit
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParExit --- Finalize the parallel execution
!
! !INTERFACE:
SUBROUTINE ParExit ( Comm ),2
! !USES:
USE mod_comm
, ONLY: mp_exit
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, OPTIONAL :: Comm
! !DESCRIPTION:
! All PEs, compute nodes and IO nodes alike meet here to terminate
! themselves. If someone does not check in, everything will hang
! here.
!
! This routine is the very {\em last} thing which is executed!
!
! !LOCAL VARIABLES:
INTEGER Ierror
!
! !SYSTEM ROUTINES:
! MPI_BARRIER, MPI_FINALIZE
!
! !REVISION HISTORY:
! 97.03.20 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.07.03 Sawyer Reformulated documentation
! 00.07.23 Sawyer Added shared memory arena implementation
! 02.08.13 Sawyer Incorporated mod_comm for low level comm.
!
!EOP
!-----------------------------------------------------------------------
!BOC
CALL mp_exit
(Comm)
RETURN
!EOC
END SUBROUTINE ParExit
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParSplit --- Split into group for I/O and computation
!
! !INTERFACE:
SUBROUTINE ParSplit( InComm, Color, InID, Comm, MyID, Nprocs ) 8
!
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator to split
INTEGER, INTENT( IN ) :: Color ! Group label
INTEGER, INTENT( IN ) :: InID ! Input ID
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: Comm ! Split communicator
INTEGER, INTENT( OUT ) :: MyID ! Group label
INTEGER, INTENT( OUT ) :: Nprocs ! Number of PEs in my group
!
! !DESCRIPTION:
! This routine splits the PEs into groups. This is currently only
! supported in MPI mode. Read the chapter on MPI\_COMM\_SPLIT
! thoroughly.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SPLIT, MPI_COMM_SIZE, MPI_COMM_RANK
!
! !REVISION HISTORY:
! 97.03.20 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.07.03 Sawyer Reformulated documentation
! 97.12.01 Sawyer Xnodes and Ynodes are explicit arguments
! 97.12.23 Lucchesi Added call to MPI_INTERCOMM_CREATE
! 98.01.06 Sawyer Additions from RL for I/O Nodes
! 98.02.02 Sawyer Added the Cartesian information
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.04.16 Sawyer Removed all use of MPI_CART (CommRow redefined)
! 99.01.10 Sawyer CommRow now defined for all rows
! 00.07.09 Sawyer Removed 2D computational mesh
! 00.08.08 Sawyer Redefined as wrapper to mpi_comm_split
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Ierror
CPP_ENTER_PROCEDURE( "PARSPLIT" )
!
! Split the communicators
!
CALL MPI_COMM_SPLIT( InComm, Color, InID, Comm, Ierror )
IF ( Comm .ne. MPI_COMM_NULL ) THEN
CALL MPI_COMM_RANK( Comm, MyID, Ierror )
CALL MPI_COMM_SIZE( Comm, Nprocs, Ierror )
ELSE
!
! This PE does not participate: mark with impossible values
!
MyID = -1
Nprocs = -1
ENDIF
CPP_LEAVE_PROCEDURE( "PARSPLIT" )
RETURN
!EOC
END SUBROUTINE ParSplit
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParFree --- Free a communicator
!
! !INTERFACE:
SUBROUTINE ParFree( InComm )
!
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER InComm
!
! !DESCRIPTION:
! This routine frees a communicator created with ParSplit
!
! !REVISION HISTORY:
! 97.09.11 Sawyer Creation, to complement ParSplit
! 00.07.24 Sawyer Revamped ParMerge into a free communicator
!
! !LOCAL VARIABLES:
INTEGER Ierror
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARFREE" )
!
CALL MPI_COMM_FREE( InComm, Ierror )
CPP_LEAVE_PROCEDURE( "PARFREE" )
RETURN
!EOC
END SUBROUTINE ParFree
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParPatternCopy --- Duplicate/replicate a comm pattern
!
! !INTERFACE:
SUBROUTINE ParPatternCopy( InComm, PatternIn, PatternOut, Multiplicity ) 1,2
!
! !USES:
USE mod_comm
, ONLY : get_partneroffset
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! # of PEs
TYPE(ParPatternType), INTENT( IN ) :: PatternIn ! Comm Pattern
INTEGER, INTENT( IN ), OPTIONAL :: Multiplicity
! !OUTPUT PARAMETERS:
TYPE(ParPatternType), INTENT( OUT ) :: PatternOut ! Comm Pattern
!
! !DESCRIPTION:
! This routine duplicates a given communication pattern.
!
! Optionally a multiplicity can be added. This replicates the
! communication pattern Mult times, that is for the case that
! the data structures are replicated in the final dimension
! Mult times. A typical example is a pattern describing a 2D
! array, e.g. a a lat-lon decomposition, which will be used
! to copy a 3D lat-lon-lev array. The strides (e.g. the number
! of elements in one plane) of the source (send) and target
! (recv) arrays are now calculated internally.
!
! !SYSTEM ROUTINES:
! MPI_TYPE_UB, MPI_TYPE_HVECTOR, MPI_TYPE_COMMIT
!
! !REVISION HISTORY:
! 03.03.20 Sawyer Creation
! 03.06.26 Sawyer Removed StrideSend/Recv from API
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Stride_S, Stride_R, Mult, Iam, GroupSize, Ipe, Ierror
INTEGER Disp, Length, I, J, ub, method
CPP_ENTER_PROCEDURE( "PARPATTERNCOPY" )
method = PatternIn%RecvDesc(1)%method
!
! Decide if this is a simple copy, or a multiple replication
!
IF ( present(Multiplicity) ) THEN
Mult = Multiplicity
ELSE
Mult = 1
ENDIF
CALL MPI_COMM_DUP( PatternIn%Comm, PatternOut%Comm, Ierror )
CALL MPI_COMM_SIZE( PatternIn%Comm, GroupSize, Ierror )
CALL MPI_COMM_RANK( PatternIn%Comm, Iam, Ierror )
PatternOut%Iam = Iam
PatternOut%Size = GroupSize
ALLOCATE( PatternOut%SendDesc( GroupSize ) )
ALLOCATE( PatternOut%RecvDesc( GroupSize ) )
PatternOut%SendDesc(:)%method = PatternIn%SendDesc(:)%method
PatternOut%RecvDesc(:)%method = PatternIn%RecvDesc(:)%method
!
! Determine the strides which are by construction the maximum upper
! bound of all the derived types. This is due to the fact that
! there are no 'holes' in the data types: even if one PE does not
! send to any other PEs, it will still have a data type for 'sending'
! data to itself.
!
Stride_S = 0
Stride_R = 0
DO Ipe=1, GroupSize
IF ( PatternIn%SendDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
CALL MPI_TYPE_UB( PatternIn%SendDesc(Ipe)%type, ub, ierror )
Stride_S = max(Stride_S,ub)
ENDIF
IF ( PatternIn%RecvDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
CALL MPI_TYPE_UB( PatternIn%RecvDesc(Ipe)%type, ub, ierror )
Stride_R = max(Stride_R,ub)
ENDIF
ENDDO
!
! Determine the output data types
!
DO Ipe=1, GroupSize
IF ( PatternIn%SendDesc(ipe)%type /= MPI_DATATYPE_NULL ) THEN
CALL MPI_TYPE_HVECTOR( Mult, 1, Stride_S, PatternIn%SendDesc(Ipe)%type,&
PatternOut%SendDesc(Ipe)%type, Ierror )
CALL MPI_TYPE_COMMIT( PatternOut%SendDesc(Ipe)%type, Ierror )
ELSE
PatternOut%SendDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
IF ( PatternIn%RecvDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
CALL MPI_TYPE_HVECTOR( Mult, 1, Stride_R, PatternIn%RecvDesc(Ipe)%type,&
PatternOut%RecvDesc(Ipe)%type, Ierror )
CALL MPI_TYPE_COMMIT( PatternOut%RecvDesc(Ipe)%type, Ierror )
ELSE
PatternOut%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ENDDO
!
! Determine the stride, which is the sum of all the blocksizes for all
! the derived types (there are no 'holes').
!
Stride_S = 0
Stride_R = 0
DO Ipe=1, GroupSize
Stride_S = Stride_S + sum( PatternIn%SendDesc(ipe)%BlockSizes(:) )
Stride_R = Stride_R + sum( PatternIn%RecvDesc(ipe)%BlockSizes(:) )
ENDDO
DO ipe=1, GroupSize
Length = SIZE(PatternIn%SendDesc(ipe)%BlockSizes)
ALLOCATE( PatternOut%SendDesc(ipe)%Displacements(Length*Mult) )
ALLOCATE( PatternOut%SendDesc(ipe)%BlockSizes(Length*Mult) )
#if defined( DEBUG_PARPATTERNCOPY )
write(iulog,*) "Multiplicity", Mult
write(iulog,*) "Old send blocksizes", PatternIn%SendDesc(ipe)%BlockSizes
#endif
DO i=1, Length
Disp = PatternIn%SendDesc(ipe)%Displacements(i)
DO j=1, Mult
PatternOut%SendDesc(ipe)%BlockSizes(i+(j-1)*Length) = &
PatternIn%SendDesc(ipe)%BlockSizes(i)
PatternOut%SendDesc(ipe)%Displacements(i+(j-1)*Length) = Disp
Disp = Disp + Stride_S
ENDDO
ENDDO
PatternOut%SendDesc(ipe)%Nparcels = &
size (PatternOut%SendDesc(ipe)%Displacements)
PatternOut%SendDesc(ipe)%Tot_Size = &
sum (PatternOut%SendDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, PatternOut%SendDesc(ipe)%Nparcels)
#if defined( DEBUG_PARPATTERNCOPY )
write(iulog,*) "Send blocksizes", PatternOut%SendDesc(ipe)%BlockSizes
write(iulog,*) "Old recv blocksizes", PatternIn%RecvDesc(ipe)%BlockSizes
#endif
Length = SIZE(PatternIn%RecvDesc(ipe)%BlockSizes)
ALLOCATE( PatternOut%RecvDesc(ipe)%Displacements(Length*Mult) )
ALLOCATE( PatternOut%RecvDesc(ipe)%BlockSizes(Length*Mult) )
DO i=1, Length
Disp = PatternIn%RecvDesc(ipe)%Displacements(i)
DO j=1, Mult
PatternOut%RecvDesc(ipe)%BlockSizes(i+(j-1)*Length) = &
PatternIn%RecvDesc(ipe)%BlockSizes(i)
PatternOut%RecvDesc(ipe)%Displacements(i+(j-1)*Length) = Disp
Disp = Disp + Stride_R
ENDDO
ENDDO
PatternOut%RecvDesc(ipe)%Nparcels = &
size (PatternOut%RecvDesc(ipe)%Displacements)
PatternOut%RecvDesc(ipe)%Tot_Size = &
sum (PatternOut%RecvDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, PatternOut%RecvDesc(ipe)%Nparcels)
#if defined( DEBUG_PARPATTERNCOPY )
write(iulog,*) "Recv blocksizes", PatternOut%RecvDesc(ipe)%BlockSizes
#endif
ENDDO
CALL get_partneroffset
( InComm, PatternOut%SendDesc, PatternOut%RecvDesc )
CPP_LEAVE_PROCEDURE( "PARPATTERNCOPY" )
RETURN
!EOC
END SUBROUTINE ParPatternCopy
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParPatternGhost --- Create pattern for given ghosting
!
! !INTERFACE:
SUBROUTINE ParPatternGhost( InComm, Ghost, Pattern, mod_method, T ) 1,8
!
! !USES:
USE decompmodule
, ONLY : DecompGlobalToLocal, DecompLocalToGlobal
USE ghostmodule
, ONLY : GhostType, GhostInfo
USE mod_comm
, ONLY : get_partneroffset
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! # of PEs
TYPE(GhostType), INTENT( IN ) :: Ghost ! # of PEs
INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
INTEGER, INTENT( IN ), OPTIONAL :: T !
! !OUTPUT PARAMETERS:
TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
!
! !DESCRIPTION:
! This routine contructs a communication pattern from the ghost
! region definition. That is, the resulting communication pattern
! can be used in ParBegin/EndTransfer with the ghosted arrays as
! inputs.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
!
! !REVISION HISTORY:
! 01.02.10 Sawyer Creation
! 01.06.02 Sawyer Renamed ParPatternGhost
! 02.06.27 Sawyer Added data type "T" as optional argument
! 03.03.04 Sawyer Set partneroffsets field
! 03.11.11 Mirin Added optional argument mod_method
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER i, j, ipe, pe, Iam, GroupSize, Num, Length, Ptr, Ierror
INTEGER Global, End, Local, GlobalSize, LocalSize, BorderSize
INTEGER DataType
INTEGER, ALLOCATABLE :: InVector(:), OutVector(:)
INTEGER, ALLOCATABLE :: LenInVector(:), LenOutVector(:)
INTEGER :: method
CPP_ENTER_PROCEDURE( "PARPATTERNGHOST" )
IF (present(T)) THEN
DataType = T
ELSE
DataType = CPP_MPI_REAL8
ENDIF
IF (present(mod_method)) THEN
method = mod_method
ELSE
method = 0 ! Default method - see mod_comm for description
ENDIF
!
! First request the needed ghost values from other processors.
!
CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
Pattern%Iam = Iam
Pattern%Size = GroupSize
ALLOCATE( Pattern%SendDesc( GroupSize ) )
ALLOCATE( Pattern%RecvDesc( GroupSize ) )
Pattern%SendDesc(:)%method = method
Pattern%RecvDesc(:)%method = method
!
! Temporary variables
!
ALLOCATE( LenInVector( GroupSize ) )
ALLOCATE( LenOutVector( GroupSize ) )
CALL GhostInfo
( Ghost,GroupSize,GlobalSize,LocalSize,BorderSize )
ALLOCATE( InVector( 2*BorderSize ) )
ALLOCATE( OutVector( 2*LocalSize ) )
!
! A rather complicated loop to define the local ghost region.
! The concept is the following: go through all the points in the
! border data structure. It contains global indices of the points
! which have to be copied over from neighboring PEs. These indices
! are collected into InVector for transmission to those PEs, in
! effect informing them of the local PEs requirements.
!
! A special case is supported: if the ghost domain wraps around
! onto the domain of the local PE! This is very tricky, because
! the index space in both Ghost%Border and Ghost%Local MUST be
! unique for DecompGlobalToLocal to work. Solution: ghost
! points are marked with the negative value of the needed domain
! value in both Ghost%Border and Ghost%Local. These are "snapped
! over" to the true global index with the ABS function, so that
! they can be subsequently found in the true local domain.
!
j = 1
DO ipe=1, GroupSize
Num = SIZE(Ghost%Border%Head(ipe)%StartTags)
Length = 0
DO i = 1, Num
Global = Ghost%Border%Head(ipe)%StartTags(i)
IF ( Global /= 0 ) THEN
Length = Length + 1
End = Ghost%Border%Head(ipe)%EndTags(i)
InVector(j) = ABS(Global)
InVector(j+1) = ABS(End)
CALL DecompGlobalToLocal
( Ghost%Local, Global, Local, Pe )
OutVector(Length) = Local-1 ! Zero-based address
OutVector(Length+Num) = End - Global+1 ! Parcel size
j = j + 2
ENDIF
ENDDO
LenInVector(ipe) = 2*Length
!
! Set the receive buffer descriptor
!
#if defined(DEBUG_PARPATTERNGHOST)
write(iulog,*) "Iam",Iam,"Pe",Ipe-1,"Lens",OutVector(Num+1:Num+Length), &
"Displacements", OutVector(1:Length)
#endif
IF ( Length > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Length, OutVector(Num+1), OutVector, &
DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%RecvDesc(ipe)%type = Ptr
ELSE
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Length) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Length) )
DO i=1, Length
Pattern%RecvDesc(ipe)%Displacements(i) = OutVector(i)
Pattern%RecvDesc(ipe)%BlockSizes(i) = OutVector(Num+i)
ENDDO
Pattern%RecvDesc(ipe)%Nparcels = &
size (Pattern%RecvDesc(ipe)%Displacements)
Pattern%RecvDesc(ipe)%Tot_Size = &
sum (Pattern%RecvDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
ENDDO
!
! Everybody exchanges the needed information
!
#if defined(DEBUG_PARPATTERNGHOST)
write(iulog,*) "iam", iam, "In", LenInVector, &
InVector( 1:SUM(LenInVector) )
#endif
CALL ParExchangeVectorInt
( InComm, LenInVector, InVector, &
LenOutVector, OutVector )
#if defined(DEBUG_PARPATTERNGHOST)
write(iulog,*) "iam", iam, "Out", LenOutVector, &
OutVector( 1:SUM(LenOutVector) )
#endif
!
! Now everyone has the segments which need to be sent to the
! immediate neighbors. Save these in PatternType.
!
j = 1
DO ipe = 1, GroupSize
Num = LenOutVector(ipe) / 2
DO i = 1, Num
CALL DecompGlobalToLocal
( Ghost%Local,OutVector(j),Local,pe )
InVector(i) = Local-1
InVector(i+Num) = OutVector(j+1) - OutVector(j) + 1
j = j + 2
ENDDO
#if defined(DEBUG_PARPATTERNGHOST)
write(iulog,*) "Iam", Iam, "To", ipe-1, "InVector", &
InVector(1:Num), "block size", InVector(Num+1:2*Num)
#endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, InVector(Num+1), InVector, &
DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%SendDesc(ipe)%type = Ptr
ELSE
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%SendDesc(ipe)%Displacements(i) = InVector(i)
Pattern%SendDesc(ipe)%BlockSizes(i) = InVector(Num+i)
ENDDO
Pattern%SendDesc(ipe)%Nparcels = &
size (Pattern%SendDesc(ipe)%Displacements)
Pattern%SendDesc(ipe)%Tot_Size = &
sum (Pattern%SendDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
ENDDO
CALL get_partneroffset
( InComm, Pattern%SendDesc, Pattern%RecvDesc )
!
! Clean up the locally allocate variables
!
DEALLOCATE( OutVector )
DEALLOCATE( InVector )
DEALLOCATE( LenOutVector )
DEALLOCATE( LenInVector )
CPP_LEAVE_PROCEDURE( "PARPATTERNGHOST" )
RETURN
!EOC
END SUBROUTINE ParPatternGhost
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParPatternDecompToDecomp --- Create pattern between decomps
!
! !INTERFACE:
SUBROUTINE ParPatternDecompToDecomp( InComm, DA, DB, Pattern, mod_method, T ) 1,9
!
! !USES:
USE decompmodule
, ONLY : DecompType, DecompGlobalToLocal, DecompInfo
USE mod_comm
, ONLY : get_partneroffset
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! # of PEs
TYPE(DecompType), INTENT( IN ) :: DA ! Source Decomp Desc
TYPE(DecompType), INTENT( IN ) :: DB ! Target Decomp Desc
INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
INTEGER, INTENT( IN ), OPTIONAL :: T !
! !OUTPUT PARAMETERS:
TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
!
! !DESCRIPTION:
! This routine contructs a communication pattern for a
! transformation from one decomposition to another, i.e., a
! so-called "transpose". The resulting communication pattern
! can be used in ParBegin/EndTransfer with the decomposed
! arrays as inputs.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
!
! !REVISION HISTORY:
! 01.05.29 Sawyer Creation from RedistributeCreate
! 01.07.13 Sawyer Rewritten to minimize DecompGlobalToLocal
! 02.07.16 Sawyer Added data type T
! 03.11.11 Mirin Added optional argument mod_method
! 07.03.11 Mirin Generalized to different sized decompositions
! 07.09.04 Dennis Reduced amount of temporary memory usage
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER I, J, Tag, Local, Pe, LenB, JB, Ipe, Num, Inc, Off
INTEGER Ptr ! Pointer type
INTEGER GroupSize, Iam, Ierror, DataType
INTEGER OldPe, TotalPtsA, NpesA, TotalPtsB, NpesB
INTEGER :: method
INTEGER :: nCount,maxCount,ierr,sz
INTEGER :: lenBjmd,nNeigh,maxLenB,maxNeigh
#ifdef _SMEMORY
TYPE (ParInfoType) :: Info
#endif
INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
INTEGER, ALLOCATABLE :: LocalA(:) ! Generic Local indices
INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
INTEGER, ALLOCATABLE :: LocalB(:) ! Local indices for B
INTEGER, ALLOCATABLE :: PeB(:) ! Processor element numbers
CPP_ENTER_PROCEDURE( "PARPATTERNDECOMPTODECOMP" )
IF (present(T)) THEN
DataType = T
ELSE
DataType = CPP_MPI_REAL8
ENDIF
IF (present(mod_method)) THEN
method = mod_method
ELSE
method = 0 ! Default method - see mod_comm for description
ENDIF
! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
CALL DecompInfo
( DA, NpesA, TotalPtsA )
CALL DecompInfo
( DB, NpesB, TotalPtsB )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
#ifdef _SMEMORY
! Calculate info about the pattern
call ParCalcInfo
(InComm,DA,DB, Info)
TotalPtsA=Info%maxNumSeg
TotalPtsB=Info%maxNumSeg
#endif
Pattern%Size = GroupSize
Pattern%Iam = Iam
!
! Allocate the number of entries and list head arrays
!
!
! Allocate the patterns
!
ALLOCATE( Pattern%SendDesc( NpesB ) )
Pattern%SendDesc(:)%method = method
if (iam .ge. NpesA) then
do ipe = 1, NpesB
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
Pattern%SendDesc(ipe)%Tot_Size = -1
Pattern%SendDesc(ipe)%Nparcels = -1
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%SendDesc(ipe)%Displacements(1) = -1
Pattern%SendDesc(ipe)%Blocksizes(1) = -1
enddo
endif
ALLOCATE( Pattern%RecvDesc( NpesA ) )
Pattern%RecvDesc(:)%method = method
if (iam .ge. NpesB) then
do ipe = 1, NpesA
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
Pattern%RecvDesc(ipe)%Tot_Size = -1
Pattern%RecvDesc(ipe)%Nparcels = -1
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%RecvDesc(ipe)%Displacements(1) = -1
Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
enddo
endif
!
! Local allocations
!
ALLOCATE( DisplacementsA( TotalPtsA ) ) ! Allocate for worst case
ALLOCATE( BlockSizesA( TotalPtsA ) ) ! Allocate for worst case
ALLOCATE( LocalA( TotalPtsA ) ) ! Allocate for worst case
ALLOCATE( DisplacementsB( TotalPtsB ) ) ! Allocate for worst case
ALLOCATE( BlockSizesB( TotalPtsB ) ) ! Allocate for worst case
ALLOCATE( LocalB( TotalPtsB ) ) ! Allocate for worst case
ALLOCATE( PeB( TotalPtsB ) ) ! Allocate for worst case
ALLOCATE( Count( GroupSize ) )
ALLOCATE( CountOut( GroupSize ) )
JB = 0
Count = 0
LenB = 0
LocalA = 0 ! (needed for parexchangevector later)
BlocksizesA = 0 ! (needed for parexchangevector later)
Num = 0
Inc = 0
if (iam .lt. NpesB) then
!
! Parse through all the tags in the local segment
DO J = 1, SIZE( DB%Head(iam+1)%StartTags )
OldPe = -1 ! Set PE undefined
DO Tag=DB%Head(iam+1)%StartTags(J), DB%Head(iam+1)%EndTags(J)
!
! Determine the index and PE of this entry on A. This might be inlined later
!
CALL DecompGlobalToLocal
( DA, Tag, Local, Pe )
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
IF ( Pe /= OldPe ) THEN
OldPe = Pe
IF ( jb > 0 ) THEN
BlockSizesB(jb) = LenB
LenB = 0
ENDIF
jb = jb+1 ! increment the segment index
DisplacementsB(jb) = Inc ! Zero-based offset of local segment
LocalB(jb) = Local-1 ! The local index (zero-based)
PeB(jb) = Pe ! Note the ID of the sender
Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
ENDIF
LenB = LenB+1 ! Good -- segment is getting longer
Inc = Inc+1 ! Increment local index
ENDDO
ENDDO
!
! Clean up
!
IF ( jb>0 ) BlockSizesB(jb) = LenB
#if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
write(iulog,*) iam, "BlockSizes", BlockSizesB(1:jb), DisplacementsB(1:jb), PeB(1:jb), Count
#endif
CPP_ASSERT_F90( JB .LE. TotalPtsB )
!
! Now create the pattern from the displacements and block sizes
!
Inc = 0
DO ipe = 1, NpesA
!
! Find the segments which are relevant for the sender ipe
! Make compact arrays BlockSizes and Displacements
!
DO j = 1, jb
IF ( PeB(j) == ipe-1 ) THEN
Inc = Inc + 1
BlockSizesA(Inc) = BlockSizesB(j)
DisplacementsA(Inc) = DisplacementsB(j)
LocalA(Inc) = LocalB(j)
ENDIF
ENDDO
ENDDO
CPP_ASSERT_F90( Inc .LE. TotalPtsA )
!
! Create the receiver communication pattern
!
Off = 0
DO ipe = 1, NpesA
Num = Count(ipe)
if(Num >0) then
#if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsA(Off+1:Off+Num), &
"BlockSizes", BlockSizesA(Off+1:Off+Num)
#endif
endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
DisplacementsA(Off+1), &
DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%RecvDesc(ipe)%type = Ptr
ELSE
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
ENDDO
Pattern%RecvDesc(ipe)%Nparcels = &
size (Pattern%RecvDesc(ipe)%Displacements)
Pattern%RecvDesc(ipe)%Tot_Size = &
sum (Pattern%RecvDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesB)
!
! Now communicate what the receiver is expecting from the sender
!
CALL ParExchangeVectorInt
( InComm, Count, LocalA, &
CountOut, DisplacementsB )
CALL ParExchangeVectorInt
( InComm, Count, BlockSizesA, &
CountOut, BlockSizesB )
!
! Sender A: BlockSizes and Displacements can now be stored
!
if (iam .lt. NpesA) then
Off = 0
DO ipe=1, NpesB
Num = CountOut(ipe)
if(Num>0) then
#if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsB(Off+1:Off+Num), &
"BlockSizes", BlockSizesB(Off+1:Off+Num)
#endif
endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
DisplacementsB(Off+1), &
DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%SendDesc(ipe)%type = Ptr
ELSE
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
ENDDO
Pattern%SendDesc(ipe)%Nparcels = &
size (Pattern%SendDesc(ipe)%Displacements)
Pattern%SendDesc(ipe)%Tot_Size = &
sum (Pattern%SendDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesA)
CALL get_partneroffset
( InComm, Pattern%SendDesc, Pattern%RecvDesc )
DEALLOCATE( CountOut )
DEALLOCATE( Count )
DEALLOCATE( PeB )
DEALLOCATE( LocalB )
DEALLOCATE( BlockSizesB )
DEALLOCATE( DisplacementsB )
DEALLOCATE( LocalA )
DEALLOCATE( BlockSizesA )
DEALLOCATE( DisplacementsA )
CPP_LEAVE_PROCEDURE( "PARPATTERNDECOMPTODECOMP" )
RETURN
!EOC
END SUBROUTINE ParPatternDecompToDecomp
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParPatternDecompToGhost --- Create pattern decomp to ghost
!
! !INTERFACE:
SUBROUTINE ParPatternDecompToGhost( InComm, DA, GB, Pattern, mod_method, T ) 1,10
!
! !USES:
USE decompmodule
, ONLY : DecompType, DecompGlobalToLocal, &
DecompInfo
USE ghostmodule
, ONLY : GhostType, GhostInfo
USE mod_comm
, ONLY : get_partneroffset
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! # of PEs
TYPE(DecompType), INTENT( IN ) :: DA ! Source Ghost Desc
TYPE(GhostType), INTENT( IN ) :: GB ! Target Ghost Desc
INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
INTEGER, INTENT( IN ), OPTIONAL :: T !
! !OUTPUT PARAMETERS:
TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
!
! !DESCRIPTION:
! This routine contructs a communication pattern for a transformation
! from decomposition to a ghosted decomposition, i.e., a so-called
! "transpose". The resulting communication pattern can be used in
! ParBegin/EndTransfer with the decomposed arrays as inputs.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
!
! !REVISION HISTORY:
! 01.07.12 Sawyer Creation from ParPatternDecompToDecomp
! 02.03.20 Sawyer Bug fix: added OldLocal, increment Off
! 02.07.16 Sawyer Added data type T
! 03.11.11 Mirin Added optional argument mod_method
! 07.03.11 Mirin Generalized to different sized decompositions
! 07.09.04 Dennis Reduced amount of temporary memory usage
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER I, J, Tag, Local, Pe, LenB, JB, Ipe, Num, Inc, Off
INTEGER Ptr ! Pointer type
INTEGER GroupSize, Iam, Ierror
INTEGER OldPe, OldLocal, TotalPtsA, NpesA
INTEGER GlobalSizeB, LocalSizeB, BorderSizeB, NpesB
INTEGER DataType
INTEGER :: method
INTEGER :: nCount, maxCount, ierr
#ifdef _SMEMORY
TYPE (ParInfoType) :: Info
#endif
INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
INTEGER, ALLOCATABLE :: LocalA(:) ! Generic Local indices
INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
INTEGER, ALLOCATABLE :: LocalB(:) ! Local indices for B
INTEGER, ALLOCATABLE :: PeB(:) ! Processor element numbers
CPP_ENTER_PROCEDURE( "PARPATTERNDECOMPTOGHOST" )
IF (present(T)) THEN
DataType = T
ELSE
DataType = CPP_MPI_REAL8
ENDIF
IF (present(mod_method)) THEN
method = mod_method
ELSE
method = 0 ! Default method - see mod_comm for description
ENDIF
! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
CALL DecompInfo
( DA, NpesA, TotalPtsA )
CALL GhostInfo
( GB, NpesB, GlobalSizeB, LocalSizeB, BorderSizeB )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
#ifdef _SMEMORY
! Calculate info about the pattern
call ParCalcInfo
(InComm,DA,GB, Info)
TotalPtsA=Info%maxNumSeg
GlobalSizeB=Info%maxNumSeg
#endif
Pattern%Size = GroupSize
Pattern%Iam = Iam
!
! Allocate the number of entries and list head arrays
!
!
! Allocate the patterns
!
ALLOCATE( Pattern%SendDesc( NpesB ) )
Pattern%SendDesc(:)%method = method
if (iam .ge. NpesA) then
do ipe = 1, NpesB
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
Pattern%SendDesc(ipe)%Tot_Size = -1
Pattern%SendDesc(ipe)%Nparcels = -1
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%SendDesc(ipe)%Displacements(1) = -1
Pattern%SendDesc(ipe)%Blocksizes(1) = -1
enddo
endif
ALLOCATE( Pattern%RecvDesc( NpesA ) )
Pattern%RecvDesc(:)%method = method
if (iam .ge. NpesB) then
do ipe = 1, NpesA
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
Pattern%RecvDesc(ipe)%Tot_Size = -1
Pattern%RecvDesc(ipe)%Nparcels = -1
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%RecvDesc(ipe)%Displacements(1) = -1
Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
enddo
endif
!
! Local allocations
!
ALLOCATE( DisplacementsA( TotalPtsA ) ) ! Allocate for worst case
ALLOCATE( BlockSizesA( TotalPtsA ) ) ! Allocate for worst case
ALLOCATE( LocalA( TotalPtsA ) ) ! Allocate for worst case
ALLOCATE( DisplacementsB( GlobalSizeB ) ) ! Allocate for worst case
ALLOCATE( BlockSizesB( GlobalSizeB ) ) ! Allocate for worst case
ALLOCATE( LocalB( GlobalSizeB ) ) ! Allocate for worst case
ALLOCATE( PeB( GlobalSizeB ) ) ! Allocate for worst case
ALLOCATE( Count( GroupSize ) )
ALLOCATE( CountOut( GroupSize ) )
JB = 0
Count = 0
LenB = 0
LocalA = 0 ! (needed for parexchangevector later)
BlocksizesA = 0 ! (needed for parexchangevector later)
Num = 0
Inc = 0
if (iam .lt. NpesB) then
!
! Parse through all the tags in the local segment
DO J = 1, SIZE( GB%Local%Head(iam+1)%StartTags )
OldPe = -1 ! Set PE undefined
OldLocal = 0 ! Set local index undefined
DO Tag=GB%Local%Head(iam+1)%StartTags(J), &
GB%Local%Head(iam+1)%EndTags(J)
IF ( Tag > 0 ) THEN ! Active point
!
! Determine the index and PE of this entry on A. This might be inlined later
!
CALL DecompGlobalToLocal
( DA, Tag, Local, Pe )
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
IF ( Pe /= OldPe .OR. Local /= OldLocal+1 ) THEN
IF ( jb > 0 ) THEN
BlockSizesB(jb) = LenB
LenB = 0
ENDIF
jb = jb+1 ! increment the segment index
DisplacementsB(jb) = Inc ! Zero-based offset of local segment
LocalB(jb) = Local-1 ! Local indices (zero-based)
PeB(jb) = Pe ! Note the ID of the sender
Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
ENDIF
OldPe = Pe ! Update PE
OldLocal= Local ! Update local index
LenB = LenB+1 ! Good -- segment is getting longer
ENDIF
Inc = Inc+1 ! Increment local index
ENDDO
ENDDO
!
! Clean up
!
IF ( jb>0 ) BlockSizesB(jb) = LenB
CPP_ASSERT_F90( JB .LE. GlobalSize )
!
! Now create the pattern from the displacements and block sizes
!
Inc = 0
DO ipe = 1, NpesA
!
! Find the segments which are relevant for the sender ipe
! Make compact arrays BlockSizes and Displacements
!
DO j = 1, jb
IF ( PeB(j) == ipe-1 ) THEN
Inc = Inc + 1
BlockSizesA(Inc) = BlockSizesB(j)
DisplacementsA(Inc) = DisplacementsB(j)
LocalA(Inc) = LocalB(j)
ENDIF
ENDDO
ENDDO
CPP_ASSERT_F90( Inc .LE. TotalPtsA )
Off = 0
DO ipe = 1, NpesA
Num = Count(ipe)
#if defined( DEBUG_PARPATTERNDECOMPTOGHOST )
write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsA(Off+1:Off+Num), &
"BlockSizes", BlockSizesA(Off+1:Off+Num)
#endif
!
! Create the receiver communication pattern
!
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
DisplacementsA(Off+1), DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%RecvDesc(ipe)%type = Ptr
ELSE
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
ENDDO
Pattern%RecvDesc(ipe)%Nparcels = &
size (Pattern%RecvDesc(ipe)%Displacements)
Pattern%RecvDesc(ipe)%Tot_Size = &
sum (Pattern%RecvDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesB)
!
! Now communicate what the receiver is expecting to the sender
!
CALL ParExchangeVectorInt
( InComm, Count, LocalA, &
CountOut, DisplacementsB )
CALL ParExchangeVectorInt
( InComm, Count, BlockSizesA, &
CountOut, BlockSizesB )
!
! Sender A: BlockSizes and Displacements can now be stored
!
if (iam .lt. NpesA) then
Off = 0
DO ipe=1, NpesB
Num = CountOut(ipe)
#if defined( DEBUG_PARPATTERNDECOMPTOGHOST )
write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsB(Off+1:Off+Num), &
"BlockSizes", BlockSizesB(Off+1:Off+Num)
#endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
DisplacementsB(Off+1), DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%SendDesc(ipe)%type = Ptr
ELSE
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
ENDDO
Pattern%SendDesc(ipe)%Nparcels = &
size (Pattern%SendDesc(ipe)%Displacements)
Pattern%SendDesc(ipe)%Tot_Size = &
sum (Pattern%SendDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesA)
CALL get_partneroffset
( InComm, Pattern%SendDesc, Pattern%RecvDesc )
DEALLOCATE( CountOut )
DEALLOCATE( Count )
DEALLOCATE( PeB )
DEALLOCATE( LocalB )
DEALLOCATE( BlockSizesB )
DEALLOCATE( DisplacementsB )
DEALLOCATE( LocalA )
DEALLOCATE( BlockSizesA )
DEALLOCATE( DisplacementsA )
CPP_LEAVE_PROCEDURE( "PARPATTERNDECOMPTOGHOST" )
RETURN
!EOC
END SUBROUTINE ParPatternDecompToGhost
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParPatternGhostToDecomp --- Create pattern between decomps
!
! !INTERFACE:
SUBROUTINE ParPatternGhostToDecomp( InComm, GA, DB, Pattern, mod_method, T ) 1,11
!
! !USES:
USE decompmodule
, ONLY : DecompType, DecompGlobalToLocal, DecompInfo
USE ghostmodule
, ONLY : GhostType, GhostInfo
USE mod_comm
, ONLY : get_partneroffset
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! # of PEs
TYPE(GhostType), INTENT( IN ) :: GA ! Source Decomp Desc
TYPE(DecompType), INTENT( IN ) :: DB ! Target Decomp Desc
INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
INTEGER, INTENT( IN ), OPTIONAL :: T !
! !OUTPUT PARAMETERS:
TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
!
! !DESCRIPTION:
! This routine contructs a communication pattern for a
! transformation from one ghosted decomposition to partitioned
! one, i.e., a so-called "transpose". The resulting communication
! pattern can be used in ParBegin/EndTransfer with the decomposed
! arrays as inputs.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
!
! !REVISION HISTORY:
! 02.01.10 Sawyer Creation from DecompToDecomp
! 02.07.16 Sawyer Added data type T
! 03.11.11 Mirin Added optional argument mod_method
! 07.03.11 Mirin Generalized to different sized decompositions
! 07.09.04 Dennis Reduced amount of temporary memory usage
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER I, J, Tag, Local, Pe, Len, JA, Ipe, Num, Inc, Off
INTEGER NpesA, GlobalSizeA, LocalSizeA, BorderSizeA
INTEGER OldPe, OldLocal, TotalPtsB, NpesB
INTEGER GroupSize, Iam, Ierror
INTEGER Ptr ! Pointer type
INTEGER DataType
INTEGER :: method
INTEGER :: nCount, maxCount, ierr
#ifdef _SMEMORY
TYPE (ParInfoType) :: Info
#endif
INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
INTEGER, ALLOCATABLE :: GlobalA(:) ! Generic Local indices
INTEGER, ALLOCATABLE :: PeA(:) ! Processor element numbers
INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
INTEGER, ALLOCATABLE :: GlobalB(:) ! Global indices for B
CPP_ENTER_PROCEDURE( "PARPATTERNGHOSTTODECOMP" )
IF (present(T)) THEN
DataType = T
ELSE
DataType = CPP_MPI_REAL8
ENDIF
IF (present(mod_method)) THEN
method = mod_method
ELSE
method = 0 ! Default method - see mod_comm for description
ENDIF
! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
CALL GhostInfo
( GA, NpesA, GlobalSizeA, LocalSizeA, BorderSizeA )
CALL DecompInfo
( DB, NpesB, TotalPtsB )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
#ifdef _SMEMORY
! Calculate info about the pattern
call ParCalcInfo
(InComm,GA,DB, Info)
GlobalSizeA=Info%maxNumSeg
TotalPtsB=Info%maxNumSeg
#endif
Pattern%Size = GroupSize
Pattern%Iam = Iam
!
! Allocate the number of entries and list head arrays
!
!
! Allocate the patterns
!
ALLOCATE( Pattern%SendDesc( NpesB ) )
Pattern%SendDesc(:)%method = method
if (iam .ge. NpesA) then
do ipe = 1, NpesB
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
Pattern%SendDesc(ipe)%Tot_Size = -1
Pattern%SendDesc(ipe)%Nparcels = -1
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%SendDesc(ipe)%Displacements(1) = -1
Pattern%SendDesc(ipe)%Blocksizes(1) = -1
enddo
endif
ALLOCATE( Pattern%RecvDesc( NpesA ) )
Pattern%RecvDesc(:)%method = method
if (iam .ge. NpesB) then
do ipe = 1, NpesA
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
Pattern%RecvDesc(ipe)%Tot_Size = -1
Pattern%RecvDesc(ipe)%Nparcels = -1
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%RecvDesc(ipe)%Displacements(1) = -1
Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
enddo
endif
!
! Local allocations
!
ALLOCATE( DisplacementsA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( BlockSizesA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( GlobalA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( PeA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( DisplacementsB( TotalPtsB ) ) ! Allocate for worst case
ALLOCATE( BlockSizesB( TotalPtsB ) ) ! Allocate for worst case
ALLOCATE( GlobalB( TotalPtsB ) ) ! Allocate for worst case
ALLOCATE( Count( GroupSize ) )
ALLOCATE( CountOut( GroupSize ) )
JA = 0
Count = 0
Len = 0
GlobalB = 0 ! (needed for parexchangevector later)
BlockSizesB = 0 ! (needed for parexchangevector later)
Num = 0
Inc = 0
if (iam .lt. NpesB) then
!
! Parse through all the tags in the local segment
DO J = 1, SIZE( DB%Head(iam+1)%StartTags )
OldPe = -1 ! Set PE undefined
OldLocal = 0 ! Set index value undefined
DO Tag=DB%Head(iam+1)%StartTags(J), DB%Head(iam+1)%EndTags(J)
!
! Determine the index and PE of this entry on A. This might be inlined later
!
CALL DecompGlobalToLocal
( GA%Decomp, Tag, Local, Pe )
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
IF ( Pe /= OldPe .OR. Local /= OldLocal+1 ) THEN
IF ( ja > 0 ) THEN
BlockSizesA(ja) = Len
Len = 0
ENDIF
ja = ja+1 ! increment the segment index
DisplacementsA(ja) = Inc ! Zero-based offset of local segment
GlobalA(ja) = Tag ! The global tag of the desired datum
PeA(ja) = Pe ! Note the ID of the sender
Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
ENDIF
OldPe = Pe ! Update old PE
OldLocal = Local ! Update old local index
Len = Len+1 ! Good -- segment is getting longer
Inc = Inc+1 ! Increment local index
ENDDO
ENDDO
!
! Clean up
!
BlockSizesA(ja) = Len
CPP_ASSERT_F90( JA .LE. GlobalSizeA )
!
! Now create the pattern from the displacements and block sizes
!
Inc = 0
DO ipe = 1, NpesA
!
! Find the segments which are relevant for the sender ipe
! Make compact arrays BlockSizes and Displacements
!
DO j = 1, ja
IF ( PeA(j) == ipe-1 ) THEN
Inc = Inc + 1
BlockSizesB(Inc) = BlockSizesA(j)
DisplacementsB(Inc) = DisplacementsA(j)
GlobalB(Inc) = GlobalA(j)
ENDIF
ENDDO
ENDDO
CPP_ASSERT_F90(Inc .LE. TotalPtsB)
!
! Create the receiver communication pattern
!
Off = 0
DO ipe = 1, NpesA
Num = Count(ipe)
#if defined( DEBUG_PARPATTERNGHOSTTODECOMP )
write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsB(Off+1:Off+Num), &
"BlockSizes", BlockSizesB(Off+1:Off+Num)
#endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
DisplacementsB(Off+1), DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%RecvDesc(ipe)%type = Ptr
ELSE
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
ENDDO
Pattern%RecvDesc(ipe)%Nparcels = &
size (Pattern%RecvDesc(ipe)%Displacements)
Pattern%RecvDesc(ipe)%Tot_Size = &
sum (Pattern%RecvDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesB)
!
! Now communicate what the receiver is expecting to the sender
!
CALL ParExchangeVectorInt
( InComm, Count, GlobalB, &
CountOut, GlobalA )
CALL ParExchangeVectorInt
( InComm, Count, BlockSizesB, &
CountOut, BlockSizesA )
if (iam .lt. NpesA) then
!
! Sender A: BlockSizes and Displacements can now be stored
!
Off = 0
DO ipe=1, NpesB
Num = CountOut(ipe)
DO i=1, Num
CALL DecompGlobalToLocal
( GA%Local, GlobalA(i+Off), Local, Pe )
DisplacementsA(i+Off) = Local-1 ! zero-based displacement
ENDDO
#if defined( DEBUG_PARPATTERNGHOSTTODECOMP )
write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsA(Off+1:Off+Num), &
"BlockSizes", BlockSizesA(Off+1:Off+Num)
#endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
DisplacementsA(Off+1), DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%SendDesc(ipe)%type = Ptr
ELSE
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
ENDDO
Pattern%SendDesc(ipe)%Nparcels = &
size (Pattern%SendDesc(ipe)%Displacements)
Pattern%SendDesc(ipe)%Tot_Size = &
sum (Pattern%SendDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesA)
CALL get_partneroffset
( InComm, Pattern%SendDesc, Pattern%RecvDesc )
DEALLOCATE( CountOut )
DEALLOCATE( Count )
DEALLOCATE( PeA )
DEALLOCATE( GlobalA )
DEALLOCATE( BlockSizesA )
DEALLOCATE( DisplacementsA )
DEALLOCATE( GlobalB )
DEALLOCATE( BlockSizesB )
DEALLOCATE( DisplacementsB )
CPP_LEAVE_PROCEDURE( "PARPATTERNGHOSTTODECOMP" )
RETURN
!EOC
END SUBROUTINE ParPatternGhostToDecomp
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParPatternGhostToGhost --- Create pattern between decomps
!
! !INTERFACE:
SUBROUTINE ParPatternGhostToGhost( InComm, GA, GB, Pattern, mod_method, T ) 1,11
!
! !USES:
USE decompmodule
, ONLY : DecompGlobalToLocal
USE ghostmodule
, ONLY : GhostType, GhostInfo
USE mod_comm
, ONLY : get_partneroffset
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! # of PEs
TYPE(GhostType), INTENT( IN ) :: GA ! Source Ghost Decomp
TYPE(GhostType), INTENT( IN ) :: GB ! Target Ghost Decomp
INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
INTEGER, INTENT( IN ), OPTIONAL :: T !
! !OUTPUT PARAMETERS:
TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
!
! !DESCRIPTION:
! This routine contructs a communication pattern for a
! transformation from one ghosted decomposition to partitioned
! one, i.e., a so-called "transpose". The resulting communication
! pattern can be used in ParBegin/EndTransfer with the decomposed
! arrays as inputs.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
!
! !REVISION HISTORY:
! 02.01.10 Sawyer Creation from DecompToDecomp
! 02.07.16 Sawyer Added data type T
! 03.11.11 Mirin Added optional argument mod_method
! 07.03.11 Mirin Generalized to different sized decompositions
! 07.09.04 Dennis Reduced amount of temporary memory usage
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER I, J, Tag, Local, Pe, Len, JA, Ipe, Num, Inc, Off
INTEGER NpesA, GlobalSizeA, LocalSizeA, BorderSizeA
INTEGER NpesB, GlobalSizeB, LocalSizeB, BorderSizeB
INTEGER GroupSize, Iam, Ierror, OldPe, OldLocal
INTEGER Ptr ! Pointer type
INTEGER DataType
INTEGER :: method
INTEGER :: nCount, maxCount, ierr
#ifdef _SMEMORY
TYPE (ParInfoType) :: Info
#endif
INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
INTEGER, ALLOCATABLE :: GlobalA(:) ! Generic Local indices
INTEGER, ALLOCATABLE :: PeA(:) ! Processor element numbers
INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
INTEGER, ALLOCATABLE :: GlobalB(:) ! Global indices for B
CPP_ENTER_PROCEDURE( "PARPATTERNGHOSTTOGHOST" )
IF (present(T)) THEN
DataType = T
ELSE
DataType = CPP_MPI_REAL8
ENDIF
IF (present(mod_method)) THEN
method = mod_method
ELSE
method = 0 ! Default method - see mod_comm for description
ENDIF
! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
CALL GhostInfo
( GA, NpesA, GlobalSizeA, LocalSizeA, BorderSizeA )
CALL GhostInfo
( GB, NpesB, GlobalSizeB, LocalSizeB, BorderSizeB )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
#ifdef _SMEMORY
! Calculate info about the pattern
call ParCalcInfo
(InComm,GA,GB, Info)
GlobalSizeA=Info%maxNumSeg
GlobalSizeB=Info%maxNumSeg
#endif
Pattern%Size = GroupSize
Pattern%Iam = Iam
!
! Allocate the number of entries and list head arrays
!
!
! Allocate the patterns
!
ALLOCATE( Pattern%SendDesc( NpesB ) )
Pattern%SendDesc(:)%method = method
if (iam .ge. NpesA) then
do ipe = 1, NpesB
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
Pattern%SendDesc(ipe)%Tot_Size = -1
Pattern%SendDesc(ipe)%Nparcels = -1
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%SendDesc(ipe)%Displacements(1) = -1
Pattern%SendDesc(ipe)%Blocksizes(1) = -1
enddo
endif
ALLOCATE( Pattern%RecvDesc( NpesA ) )
Pattern%RecvDesc(:)%method = method
if (iam .ge. NpesB) then
do ipe = 1, NpesA
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
Pattern%RecvDesc(ipe)%Tot_Size = -1
Pattern%RecvDesc(ipe)%Nparcels = -1
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
Pattern%RecvDesc(ipe)%Displacements(1) = -1
Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
enddo
endif
!
! Local allocations
!
ALLOCATE( DisplacementsA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( BlockSizesA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( GlobalA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( PeA( GlobalSizeA ) ) ! Allocate for worst case
ALLOCATE( DisplacementsB( GlobalSizeB ) ) ! Allocate for worst case
ALLOCATE( BlockSizesB( GlobalSizeB ) ) ! Allocate for worst case
ALLOCATE( GlobalB( GlobalSizeB ) ) ! Allocate for worst case
ALLOCATE( Count( GroupSize ) )
ALLOCATE( CountOut( GroupSize ) )
JA = 0
Count = 0
Len = 0
GlobalB = 0 ! (needed for parexchangevector later)
BlocksizesB = 0 ! (needed for parexchangevector later)
Num = 0
Inc = 0
if (iam .lt. NpesB) then
!
! Parse through all the tags in the local segment
DO J = 1, SIZE( GB%Local%Head(iam+1)%StartTags )
OldPe = -1 ! Set PE undefined
OldLocal = 0 ! Set index value undefined
DO Tag=GB%Local%Head(iam+1)%StartTags(J), GB%Local%Head(iam+1)%EndTags(J)
IF ( Tag > 0 ) THEN ! Active point
!
! Determine the index and PE of this entry on A. This might be inlined later
!
CALL DecompGlobalToLocal
( GA%Decomp, Tag, Local, Pe )
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
IF ( Pe /= OldPe .OR. Local /= OldLocal+1 ) THEN
IF ( ja > 0 ) THEN
BlockSizesA(ja) = Len
Len = 0
ENDIF
ja = ja+1 ! increment the segment index
DisplacementsA(ja) = Inc ! Zero-based offset of local segment
GlobalA(ja) = Tag ! The global tag of the desired datum
PeA(ja) = Pe ! Note the ID of the sender
Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
ENDIF
OldPe = Pe ! Update old PE
OldLocal = Local ! Update old local index
Len = Len+1 ! Good -- segment is getting longer
ENDIF
Inc = Inc+1 ! Increment local index
ENDDO
ENDDO
!
! Clean up
!
BlockSizesA(ja) = Len
CPP_ASSERT_F90( JA .LE. GlobalSizeA )
!
! Now create the pattern from the displacements and block sizes
!
Inc = 0
DO ipe = 1, NpesA
!
! Find the segments which are relevant for the sender ipe
! Make compact arrays BlockSizes and Displacements
!
DO j = 1, ja
IF ( PeA(j) == ipe-1 ) THEN
Inc = Inc + 1
BlockSizesB(Inc) = BlockSizesA(j)
DisplacementsB(Inc) = DisplacementsA(j)
GlobalB(Inc) = GlobalA(j)
ENDIF
ENDDO
ENDDO
CPP_ASSERT_F90( Inc .LE. GlobalSizeB )
!
! Create the receiver communication pattern
!
Off = 0
DO ipe = 1, NpesA
Num = Count(ipe)
#if defined(DEBUG_PARPATTERNGHOSTTOGHOST)
write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsB(Off+1:Off+Num), &
"BlockSizes", BlockSizesB(Off+1:Off+Num)
#endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
DisplacementsB(Off+1), DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%RecvDesc(ipe)%type = Ptr
ELSE
Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
ENDDO
Pattern%RecvDesc(ipe)%Nparcels = &
size (Pattern%RecvDesc(ipe)%Displacements)
Pattern%RecvDesc(ipe)%Tot_Size = &
sum (Pattern%RecvDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesB)
!
! Now communicate what the receiver is expecting to the sender
!
CALL ParExchangeVectorInt
( InComm, Count, GlobalB, &
CountOut, GlobalA )
CALL ParExchangeVectorInt
( InComm, Count, BlockSizesB, &
CountOut, BlockSizesA )
if (iam .lt. NpesA) then
!
! Sender A: BlockSizes and Displacements can now be stored
!
Off = 0
DO ipe=1, NpesB
Num = CountOut(ipe)
DO i=1, Num
CALL DecompGlobalToLocal
( GA%Local, GlobalA(i+Off), Local, Pe )
DisplacementsA(i+Off) = Local-1 ! zero-based displacement
ENDDO
#if defined(DEBUG_PARPATTERNGHOSTTOGHOST)
write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
"Displacements", DisplacementsA(Off+1:Off+Num), &
"BlockSizes", BlockSizesA(Off+1:Off+Num)
#endif
IF ( Num > 0 .and. method > 0 ) THEN
CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
DisplacementsA(Off+1), DataType, Ptr, Ierror )
CALL MPI_TYPE_COMMIT( Ptr, Ierror )
Pattern%SendDesc(ipe)%type = Ptr
ELSE
Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
ENDIF
ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
DO i=1, Num
Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
ENDDO
Pattern%SendDesc(ipe)%Nparcels = &
size (Pattern%SendDesc(ipe)%Displacements)
Pattern%SendDesc(ipe)%Tot_Size = &
sum (Pattern%SendDesc(ipe)%Blocksizes)
Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
Off = Off + Num
ENDDO
endif ! (iam .lt. NpesA)
CALL get_partneroffset
( InComm, Pattern%SendDesc, Pattern%RecvDesc )
DEALLOCATE( CountOut )
DEALLOCATE( Count )
DEALLOCATE( PeA )
DEALLOCATE( GlobalA )
DEALLOCATE( BlockSizesA )
DEALLOCATE( DisplacementsA )
DEALLOCATE( GlobalB )
DEALLOCATE( BlockSizesB )
DEALLOCATE( DisplacementsB )
CPP_LEAVE_PROCEDURE( "PARPATTERNGHOSTTOGHOST" )
RETURN
!EOC
END SUBROUTINE ParPatternGhostToGhost
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParPatternFree --- Free the communication pattern
!
! !INTERFACE:
SUBROUTINE ParPatternFree( InComm, Pattern ) 17
!
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! # of PEs
! !INPUT/OUTPUT PARAMETERS:
TYPE(ParPatternType), INTENT( INOUT ) :: Pattern ! Comm Pattern
!
! !DESCRIPTION:
! This routine frees a communication pattern.
!
! !SYSTEM ROUTINES:
! MPI_TYPE_FREE
!
! !BUGS:
! The MPI_TYPE_FREE statement does not seem to work with FFC
!
! !REVISION HISTORY:
! 01.02.10 Sawyer Creation
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER ipe, GroupSize, Pointer, Ierror, method
CPP_ENTER_PROCEDURE( "PARPATTERNFREE" )
method = Pattern%RecvDesc(1)%method
!
! First request the needed ghost values from other processors.
!
! Free all the MPI derived types
!
DO ipe=1, Pattern%Size
Pointer = Pattern%SendDesc(ipe)%type
IF ( Pointer /= MPI_DATATYPE_NULL ) THEN
CALL MPI_TYPE_FREE( Pointer, Ierror )
ENDIF
Pointer = Pattern%RecvDesc(ipe)%type
IF ( Pointer /= MPI_DATATYPE_NULL ) THEN
CALL MPI_TYPE_FREE( Pointer, Ierror )
ENDIF
ENDDO
DO ipe=1, size(Pattern%RecvDesc)
DEALLOCATE( Pattern%RecvDesc(ipe)%Displacements )
DEALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes )
ENDDO
DO ipe=1, size(Pattern%SendDesc)
DEALLOCATE( Pattern%SendDesc(ipe)%Displacements )
DEALLOCATE( Pattern%SendDesc(ipe)%BlockSizes )
ENDDO
DEALLOCATE( Pattern%SendDesc )
DEALLOCATE( Pattern%RecvDesc )
CPP_LEAVE_PROCEDURE( "PARPATTERNFREE" )
RETURN
!EOC
END SUBROUTINE ParPatternFree
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParScatterReal --- Scatter slice to all PEs
!
! !INTERFACE:
SUBROUTINE ParScatterReal ( InComm, Root, Slice, Decomp, Local ) 1,1
! !USES:
USE decompmodule
, ONLY: DecompType, Lists
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Root ! Root PE
REAL(CPP_REAL8), INTENT( IN ) :: Slice(*) ! Global Slice
TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
! !OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( OUT ) :: Local(*) ! Local Slice
! !DESCRIPTION:
! Given a decomposition of the domain, dole out a slice
! (one-dimensional array) to all the constituent PEs as described
! by the decomposition Decomp.
!
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
!
! !REVISION HISTORY:
! 97.04.14 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.05.01 Sawyer Use Decomp%Comm for all local info
! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
! 97.05.29 Sawyer Changed 2-D arrays to 1-D
! 97.07.03 Sawyer Reformulated documentation
! 97.07.22 Sawyer DecompType has moved to DecompModule
! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
! 97.12.05 Sawyer Added InComm and Root as arguments
! 97.12.05 Sawyer Added logic to support intercommunicators
! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.03.30 Sawyer Stats dimension corrected: Gsize*MPI_STATUS_SIZE
! 99.01.19 Sawyer Dropped assumed-size arrays
! 00.07.07 Sawyer Removed "1D" references
! 00.07.23 Sawyer Implementation with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Ierror, I, J, K, L, Iam, GroupSize
INTEGER Status( MPI_STATUS_SIZE )
Integer, allocatable :: Reqs(:), Stats(:)
REAL(CPP_REAL8), ALLOCATABLE :: SendBuf(:)
!
CPP_ENTER_PROCEDURE( "PARSCATTERREAL" )
!
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
allocate (Reqs(GroupSize))
allocate (Stats(GroupSize*MPI_STATUS_SIZE))
IF ( Iam .EQ. Root ) THEN
ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
L = 0
DO I = 1, GroupSize
!
! Pick out the array sections to be sent.
! This is the inverse of the operation in ParGather
!
DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
L = L+1
SendBuf(L) = Slice(K)
ENDDO
ENDDO
!
! This is a non-blocking send. SendBuf cannot be immediately deallocated
!
! WARNING: F90-MPI inconsistency: make sure the indexing below always works
!
CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1), &
Decomp%NumEntries(I), CPP_MPI_REAL8, &
I-1, 0, InComm, Reqs(I), Ierror )
ENDDO
ENDIF
!
! All receive from the root.
!
! The local array may be larger than that specified in the decomposition
!
CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1), &
CPP_MPI_REAL8, &
Root, 0, InComm, Status, Ierror )
!
! Experience shows that we should wait for all the non-blocking
! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
!
IF ( Iam .EQ. Root ) THEN
CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
DEALLOCATE( SendBuf )
ENDIF
!
! The following may be needed on some platforms to avoid an MPI bug.
!
CALL MPI_BARRIER( InComm, Ierror )
deallocate (Reqs)
deallocate (Stats)
CPP_LEAVE_PROCEDURE( "PARSCATTERREAL" )
RETURN
!EOC
END SUBROUTINE ParScatterReal
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParScatterReal4 --- Scatter slice to all PEs
!
! !INTERFACE:
SUBROUTINE ParScatterReal4 ( InComm, Root, Slice, Decomp, Local ) 1,1
! !USES:
USE decompmodule
, ONLY: DecompType, Lists
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Root ! Root PE
REAL(CPP_REAL4), INTENT( IN ) :: Slice(*) ! Global Slice
TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
! !OUTPUT PARAMETERS:
REAL(CPP_REAL4), INTENT( OUT ) :: Local(*) ! Local Slice
! !DESCRIPTION:
! Given a decomposition of the domain, dole out a slice
! (one-dimensional array) to all the constituent PEs as described
! by the decomposition Decomp.
!
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
!
! !REVISION HISTORY:
! 97.04.14 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.05.01 Sawyer Use Decomp%Comm for all local info
! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
! 97.05.29 Sawyer Changed 2-D arrays to 1-D
! 97.07.03 Sawyer Reformulated documentation
! 97.07.22 Sawyer DecompType has moved to DecompModule
! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
! 97.12.05 Sawyer Added InComm and Root as arguments
! 97.12.05 Sawyer Added logic to support intercommunicators
! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.03.30 Sawyer Stats dimension corrected: Gsize*MPI_STATUS_SIZE
! 99.01.19 Sawyer Dropped assumed-size arrays
! 00.07.07 Sawyer Removed "1D" references
! 00.07.23 Sawyer Implementation with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Ierror, I, J, K, L, Iam, GroupSize
INTEGER Status( MPI_STATUS_SIZE )
Integer, allocatable :: Reqs(:), Stats(:)
REAL(CPP_REAL4), ALLOCATABLE :: SendBuf(:)
!
CPP_ENTER_PROCEDURE( "PARSCATTERREAL4" )
!
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
allocate (Reqs(GroupSize))
allocate (Stats(GroupSize*MPI_STATUS_SIZE))
IF ( Iam .EQ. Root ) THEN
ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
L = 0
DO I = 1, GroupSize
!
! Pick out the array sections to be sent.
! This is the inverse of the operation in ParGather
!
DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
L = L+1
SendBuf(L) = Slice(K)
ENDDO
ENDDO
!
! This is a non-blocking send. SendBuf cannot be immediately deallocated
!
! WARNING: F90-MPI inconsistency: make sure the indexing below always works
!
CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1), &
Decomp%NumEntries(I), CPP_MPI_REAL4, &
I-1, 0, InComm, Reqs(I), Ierror )
ENDDO
ENDIF
!
! All receive from the root.
!
! The local array may be larger than that specified in the decomposition
!
CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1), &
CPP_MPI_REAL4, &
Root, 0, InComm, Status, Ierror )
!
! Experience shows that we should wait for all the non-blocking
! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
!
IF ( Iam .EQ. Root ) THEN
CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
DEALLOCATE( SendBuf )
ENDIF
!
! The following may be needed on some platforms to avoid an MPI bug.
!
CALL MPI_BARRIER( InComm, Ierror )
deallocate (Reqs)
deallocate (Stats)
CPP_LEAVE_PROCEDURE( "PARSCATTERREAL4" )
RETURN
!EOC
END SUBROUTINE ParScatterReal4
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParScatterInt --- Scatter slice to all PEs
!
! !INTERFACE:
SUBROUTINE ParScatterInt ( InComm, Root, Slice, Decomp, Local ) 1,1
! !USES:
USE decompmodule
, ONLY: DecompType, Lists
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Root ! Root PE
INTEGER, INTENT( IN ) :: Slice(*) ! Global Slice
TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: Local(*) ! Local Slice
! !DESCRIPTION:
! Given a decomposition of the domain, dole out a slice
! (one-dimensional array) to all the constituent PEs as described
! by the decomposition Decomp.
!
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
!
! !REVISION HISTORY:
! 97.04.14 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.05.01 Sawyer Use Decomp%Comm for all local info
! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
! 97.05.29 Sawyer Changed 2-D arrays to 1-D
! 97.07.03 Sawyer Reformulated documentation
! 97.07.22 Sawyer DecompType has moved to DecompModule
! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
! 97.12.05 Sawyer Added InComm and Root as arguments
! 97.12.05 Sawyer Added logic to support intercommunicators
! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.03.30 Sawyer Stats dimension corrected: Gsize*MPI_STATUS_SIZE
! 99.01.19 Sawyer Dropped assumed-size arrays
! 00.07.07 Sawyer Removed "1D" references
! 00.07.23 Sawyer Implementation with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Ierror, I, J, K, L, Iam, GroupSize
INTEGER Status( MPI_STATUS_SIZE )
Integer, allocatable :: Reqs(:), Stats(:)
INTEGER, ALLOCATABLE :: SendBuf(:)
!
CPP_ENTER_PROCEDURE( "PARSCATTERINT" )
!
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
allocate (Reqs(GroupSize))
allocate (Stats(GroupSize*MPI_STATUS_SIZE))
IF ( Iam .EQ. Root ) THEN
ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
L = 0
DO I = 1, GroupSize
!
! Pick out the array sections to be sent.
! This is the inverse of the operation in ParGather
!
DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
L = L+1
SendBuf(L) = Slice(K)
ENDDO
ENDDO
!
! This is a non-blocking send. SendBuf cannot be immediately deallocated
!
! WARNING: F90-MPI inconsistency: make sure the indexing below always works
!
CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1), &
Decomp%NumEntries(I), CPP_MPI_INTEGER, &
I-1, 0, InComm, Reqs(I), Ierror )
ENDDO
ENDIF
!
! All receive from the root.
!
! The local array may be larger than that specified in the decomposition
!
CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1), &
CPP_MPI_INTEGER, &
Root, 0, InComm, Status, Ierror )
!
! Experience shows that we should wait for all the non-blocking
! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
!
IF ( Iam .EQ. Root ) THEN
CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
DEALLOCATE( SendBuf )
ENDIF
!
! The following may be needed on some platforms to avoid an MPI bug.
!
CALL MPI_BARRIER( InComm, Ierror )
deallocate (Reqs)
deallocate (Stats)
CPP_LEAVE_PROCEDURE( "PARSCATTERINT" )
RETURN
!EOC
END SUBROUTINE ParScatterInt
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParGatherReal --- Gather Slice from all PEs
!
! !INTERFACE:
SUBROUTINE ParGatherReal ( InComm, Root, Local, Decomp, Slice ) 1,1
! !USES:
USE decompmodule
, ONLY: DecompType, Lists
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Root ! Root PE
REAL(CPP_REAL8), INTENT( IN ) :: Local(*) ! Local Slice
TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
! !OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( OUT ) :: Slice(*) ! Global Slice
! !DESCRIPTION:
! Given a decomposition of the domain and a local portion of the
! total slice on each PE, gather together the portions into a
! global slice on the root PE
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
!
! !REVISION HISTORY:
! 97.04.14 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.05.01 Sawyer Use Decomp%Comm for all local info
! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
! 97.05.29 Sawyer Changed 2-D arrays to 1-D
! 97.07.03 Sawyer Reformulated documentation
! 97.07.22 Sawyer DecompType has moved to DecompModule
! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
! 97.12.05 Sawyer Added InComm and Root as arguments
! 97.12.05 Sawyer Added logic to support intercommunicators
! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
! 98.01.29 Sawyer Corrected assertions
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.03.31 Sawyer Stat dimension corrected: MPI_STATUS_SIZE
! 98.04.22 Sawyer Local no longer assumed shape: Local(*)
! 99.01.19 Sawyer Dropped assumed-size arrays
! 00.07.07 Sawyer Removed "1D" references
! 00.07.23 Sawyer Implementation with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
REAL(CPP_REAL8), ALLOCATABLE :: RecvBuf(:)
!
CPP_ENTER_PROCEDURE( "PARGATHERREAL" )
!
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
!
! All PEs send their contribution to the root
!
CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1), &
CPP_MPI_REAL8, &
Root, Iam+3001, InComm, Req, Ierror )
IF ( Iam .EQ. Root ) THEN
ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
!
! On the Root PE receive from every other PE
!
L = 0
DO I = 1, GroupSize
!
! This is a blocking, synchronous recv. All the
! sends should have been posted so it should not deadlock
!
! WARNING: F90-MPI inconsistency: make sure the indexing below always works
!
CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I), &
CPP_MPI_REAL8, I-1, I+3000, InComm, &
Status, Ierror )
!
! This is the simple reverse mapping of that in ParScatter
!
DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
L = L + 1
Slice(K) = RecvBuf(L)
#if defined(DEBUG_PARGATHERREAL)
PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
#endif
ENDDO
ENDDO
ENDDO
DEALLOCATE( RecvBuf )
ENDIF
CALL MPI_WAIT( Req, Stat, Ierror )
!
! The following may be needed on some platforms to avoid an MPI bug.
!
CALL MPI_BARRIER( InComm, Ierror )
CPP_LEAVE_PROCEDURE( "PARGATHERREAL" )
RETURN
!EOC
END SUBROUTINE ParGatherReal
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParGatherReal4 --- Gather Slice from all PEs
!
! !INTERFACE:
SUBROUTINE ParGatherReal4 ( InComm, Root, Local, Decomp, Slice ) 1,1
! !USES:
USE decompmodule
, ONLY: DecompType, Lists
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Root ! Root PE
REAL(CPP_REAL4), INTENT( IN ) :: Local(*) ! Local Slice
TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
! !OUTPUT PARAMETERS:
REAL(CPP_REAL4), INTENT( OUT ) :: Slice(*) ! Global Slice
! !DESCRIPTION:
! Given a decomposition of the domain and a local portion of the
! total slice on each PE, gather together the portions into a
! global slice on the root PE
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
!
! !REVISION HISTORY:
! 97.04.14 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.05.01 Sawyer Use Decomp%Comm for all local info
! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
! 97.05.29 Sawyer Changed 2-D arrays to 1-D
! 97.07.03 Sawyer Reformulated documentation
! 97.07.22 Sawyer DecompType has moved to DecompModule
! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
! 97.12.05 Sawyer Added InComm and Root as arguments
! 97.12.05 Sawyer Added logic to support intercommunicators
! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
! 98.01.29 Sawyer Corrected assertions
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.03.31 Sawyer Stat dimension corrected: MPI_STATUS_SIZE
! 98.04.22 Sawyer Local no longer assumed shape: Local(*)
! 99.01.19 Sawyer Dropped assumed-size arrays
! 00.07.07 Sawyer Removed "1D" references
! 00.07.23 Sawyer Implementation with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
REAL(CPP_REAL4), ALLOCATABLE :: RecvBuf(:)
!
CPP_ENTER_PROCEDURE( "PARGATHERREAL4" )
!
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
!
! All PEs send their contribution to the root
!
CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1), &
CPP_MPI_REAL4, &
Root, Iam+3001, InComm, Req, Ierror )
IF ( Iam .EQ. Root ) THEN
ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
!
! On the Root PE receive from every other PE
!
L = 0
DO I = 1, GroupSize
!
! This is a blocking, synchronous recv. All the
! sends should have been posted so it should not deadlock
!
! WARNING: F90-MPI inconsistency: make sure the indexing below always works
!
CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I), &
CPP_MPI_REAL4, I-1, I+3000, InComm, &
Status, Ierror )
!
! This is the simple reverse mapping of that in ParScatter
!
DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
L = L + 1
Slice(K) = RecvBuf(L)
#if defined(DEBUG_PARGATHERREAL4)
PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
#endif
ENDDO
ENDDO
ENDDO
DEALLOCATE( RecvBuf )
ENDIF
CALL MPI_WAIT( Req, Stat, Ierror )
!
! The following may be needed on some platforms to avoid an MPI bug.
!
CALL MPI_BARRIER( InComm, Ierror )
CPP_LEAVE_PROCEDURE( "PARGATHERREAL4" )
RETURN
!EOC
END SUBROUTINE ParGatherReal4
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParGatherInt --- Gather Slice from all PEs
!
! !INTERFACE:
SUBROUTINE ParGatherInt ( InComm, Root, Local, Decomp, Slice ) 2,1
! !USES:
USE decompmodule
, ONLY: DecompType, Lists
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Root ! Root PE
INTEGER, INTENT( IN ) :: Local(*) ! Local Slice
TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: Slice(*) ! Global Slice
! !DESCRIPTION:
! Given a decomposition of the domain and a local portion of the
! total slice on each PE, gather together the portions into a
! global slice on the root PE
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
!
! !REVISION HISTORY:
! 97.04.14 Sawyer Creation
! 97.04.16 Sawyer Cleaned up for walk-through
! 97.05.01 Sawyer Use Decomp%Comm for all local info
! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
! 97.05.29 Sawyer Changed 2-D arrays to 1-D
! 97.07.03 Sawyer Reformulated documentation
! 97.07.22 Sawyer DecompType has moved to DecompModule
! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
! 97.12.05 Sawyer Added InComm and Root as arguments
! 97.12.05 Sawyer Added logic to support intercommunicators
! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
! 98.01.29 Sawyer Corrected assertions
! 98.02.05 Sawyer Removed the use of intercommunicators
! 98.03.31 Sawyer Stat dimension corrected: MPI_STATUS_SIZE
! 98.04.22 Sawyer Local no longer assumed shape: Local(*)
! 99.01.19 Sawyer Dropped assumed-size arrays
! 00.07.07 Sawyer Removed "1D" references
! 00.07.23 Sawyer Implementation with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
INTEGER, ALLOCATABLE :: RecvBuf(:)
!
CPP_ENTER_PROCEDURE( "PARGATHERINT" )
!
CALL MPI_COMM_RANK( InComm, Iam, Ierror )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
!
! All PEs send their contribution to the root
!
CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1), CPP_MPI_INTEGER, &
Root, Iam+3001, InComm, Req, Ierror )
IF ( Iam .EQ. Root ) THEN
ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
!
! On the Root PE receive from every other PE
!
L = 0
DO I = 1, GroupSize
!
! This is a blocking, synchronous recv. All the
! sends should have been posted so it should not deadlock
!
! WARNING: F90-MPI inconsistency: make sure the indexing below always works
!
CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I), &
CPP_MPI_INTEGER, I-1, I+3000, InComm, &
Status, Ierror )
!
! This is the simple reverse mapping of that in ParScatter
!
DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
L = L + 1
Slice(K) = RecvBuf(L)
#if defined(DEBUG_PARGATHERINT)
PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
#endif
ENDDO
ENDDO
ENDDO
DEALLOCATE( RecvBuf )
ENDIF
CALL MPI_WAIT( Req, Stat, Ierror )
!
! The following may be needed on some platforms to avoid an MPI bug.
!
CALL MPI_BARRIER( InComm, Ierror )
CPP_LEAVE_PROCEDURE( "PARGATHERINT" )
RETURN
!EOC
END SUBROUTINE ParGatherInt
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParBeginTransferReal --- Start an ASYNC Real Transfer
!
! !INTERFACE:
SUBROUTINE ParBeginTransferReal(InComm, NrInPackets, NrOutPackets, & 1
Dest, Src, InBuf, InIA, &
OutBuf, OutIA )
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: NrInPackets ! Number of in packets
INTEGER, INTENT( IN ) :: NrOutPackets ! Number of out packets
INTEGER, INTENT( IN ) :: Dest(:) ! PE destinations
INTEGER, INTENT( IN ) :: Src(:) ! PE sources
REAL(CPP_REAL8), INTENT(IN) :: InBuf(:) ! Input buffer
INTEGER, INTENT( IN ) :: InIA(:) ! In packet counter
INTEGER, INTENT( IN ) :: OutIA(:) ! Out packet counter
! !OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( OUT ) :: OutBuf(:) ! Output buffer
! !DESCRIPTION:
!
! This routine initiates an async. transfer of an array InBuf
! partitioned into parcels defined by the arrays InIA and Dest
! to an output array OutBuf on another PE. InIA(1) contains
! the number of reals to be sent to Dest(1), InIA(2) the number
! of reals to be sent to Dest(2), etc. Similarly, the array
! OutBuf on the calling PE is partitioned into parcels by OutIA
! and Src, with OutIA(1) the number of reals anticipated from
! Src(1), etc.
!
! The default implementation reads through the contiguous array
! InBuf and sends the parcels to the PEs designated with an
! asyncronous MPI\_ISEND. Correspondingly it posts the receives
! with an asynchronous MPI\_IRECV.
!
! Wait handles InHandle(:) and OutHandle(:) are in common block.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference!!!!!!!!!!
!
! The buffers may not be accessed until after the call to
! ParEndTransferReal.
!
!
! !SYSTEM ROUTINES:
! MPI_COMM_RANK, MPI_ISEND, MPI_IRECV
!
! !REVISION HISTORY:
! 97.09.26 Sawyer Creation
! 97.12.05 Sawyer Renamed Comm to InComm to avoid collisions
! 98.02.26 Sawyer Added Dest, Src and Remote to clean up code
! 98.04.16 Sawyer Number of packets become input arguments
! 98.09.04 Sawyer Cleaned interface: handles in common, no Remote
! 99.03.04 Sawyer Inlined ParCalculateRemote
! 99.06.01 Sawyer Changed pointer arrays to INTEGER*8 for SGI
! 00.08.07 Sawyer Implementation with shared memory arenas
! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Iam, GroupSize, Nr, Icnt, Packet, I, Ierr
CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERREAL" )
CPP_ASSERT_F90( NrInPackets .LE. SIZE( Dest ) )
CPP_ASSERT_F90( NrInPackets .LE. SIZE( InIA ) )
CPP_ASSERT_F90( NrOutPackets .LE. SIZE( Src ) )
CPP_ASSERT_F90( NrOutPackets .LE. SIZE( OutIA ) )
!
! Increment the ongoing transfer number
BegTrf = MOD(BegTrf,MAX_TRF) + 1
CALL MPI_COMM_RANK( InComm, Iam, Ierr )
CALL MPI_COMM_SIZE( InComm, GroupSize, Ierr )
!
! MPI: Irecv over all processes
!
Icnt = 1
DO Packet = 1, NrOutPackets
Nr = OutIA( Packet )
IF ( Nr .GT. 0 ) THEN
#if defined( DEBUG_PARBEGINTRANSFERREAL )
PRINT *, "Iam ",Iam," posts recv ",Nr," from ", Src( Packet )
#endif
!
! Receive the buffers with MPI_Irecv. Non-blocking
!
CPP_ASSERT_F90( Icnt+Nr-1 .LE. SIZE( OutBuf ) )
CALL MPI_IRECV( OutBuf( Icnt ), Nr, &
CPP_MPI_REAL8, Src( Packet ), Src( Packet ), &
InComm, OutHandle(Packet,1,BegTrf), Ierr )
ELSE
OutHandle(Packet,1,BegTrf) = MPI_REQUEST_NULL
END IF
Icnt = Icnt + Nr
END DO
!
! MPI: Isend over all processes
!
Icnt = 1
CPP_ASSERT_F90( NrInPackets .LE. SIZE( Dest ) )
CPP_ASSERT_F90( NrInPackets .LE. SIZE( InIA ) )
DO Packet = 1, NrInPackets
Nr = InIA( Packet )
IF ( Nr .GT. 0 ) THEN
#if defined( DEBUG_PARBEGINTRANSFERREAL )
PRINT *,"Iam ",Iam," posts send ",Nr," to ",Dest( Packet )
#endif
!
! Send the individual buffers with non-blocking sends
!
CPP_ASSERT_F90( Icnt+Nr-1 .LE. SIZE( InBuf ) )
CALL MPI_ISEND ( InBuf( Icnt ), Nr, &
CPP_MPI_REAL8, Dest( Packet ), Iam, &
InComm, InHandle(Packet,1,BegTrf), Ierr )
ELSE
InHandle(Packet,1,BegTrf) = MPI_REQUEST_NULL
END IF
Icnt = Icnt + Nr
END DO
!
!
CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERREAL" )
RETURN
!EOC
END SUBROUTINE ParBeginTransferReal
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParBeginTransferPattern1D --- Start ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParBeginTransferPattern1D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_sendirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT( IN ) :: InBuf(*) ! Input buffer
! !OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( OUT ) :: OutBuf(*) ! Output buffer
! !DESCRIPTION:
!
! This routine initiates an async. transfer of an array InBuf.
! The communication pattern indicates the indices outgoing
! values of InBuf and incoming values for OutBuf. This routine
! is fundamentally equivalent to ParBeginTransferReal; the use
! of a communication pattern is largely a performance enhancement,
! since it eliminates the need for intermediate buffering.
!
! Wait handles InHandle and OutHandle are module variables
! The buffers may not be accessed until after the call to
! ParEndTransferReal.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference.
!
! !REVISION HISTORY:
! 01.02.14 Sawyer Creation from ParBeginTransferReal
! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_sendirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN1D" )
CALL mp_sendirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
!
CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN1D" )
RETURN
!EOC
END SUBROUTINE ParBeginTransferPattern1D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParBeginTransferPattern1Dint --- Start ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParBeginTransferPattern1Dint( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_sendirr_i4
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
INTEGER, INTENT( IN ) :: InBuf(*) ! Input buffer
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: OutBuf(*) ! Output buffer
! !DESCRIPTION:
!
! This routine initiates an async. transfer of an array InBuf.
! The communication pattern indicates the indices outgoing
! values of InBuf and incoming values for OutBuf. This routine
! is fundamentally equivalent to ParBeginTransferReal; the use
! of a communication pattern is largely a performance enhancement,
! since it eliminates the need for intermediate buffering.
!
! Wait handles InHandle and OutHandle are module variables
! The buffers may not be accessed until after the call to
! ParEndTransferReal.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference.
!
! !REVISION HISTORY:
! 01.02.14 Sawyer Creation from ParBeginTransferReal
! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_sendirr_i4
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN1DINT" )
CALL mp_sendirr_i4
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN1DINT" )
RETURN
!EOC
END SUBROUTINE ParBeginTransferPattern1Dint
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParBeginTransferPattern2D --- Start an ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParBeginTransferPattern2D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_sendirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT(IN) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT(IN) :: InBuf(:,:) ! Input buffer
! !OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT(OUT) :: OutBuf(:,:) ! Output buffer
! !DESCRIPTION:
!
! This routine initiates an async. transfer of an array InBuf.
! The communication pattern indicates the indices outgoing
! values of InBuf and incoming values for OutBuf. This routine
! is fundamentally equivalent to ParBeginTransferReal; the use
! of a communication pattern is largely a performance enhancement,
! since it eliminates the need for intermediate buffering.
!
! Wait handles InHandle and OutHandle are module variables
! The buffers may not be accessed until after the call to
! ParEndTransferReal.
!
! !REVISION HISTORY:
! 01.10.01 Sawyer Creation from ParBeginTransferPattern
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_sendirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN2D" )
CALL mp_sendirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN2D" )
RETURN
!EOC
END SUBROUTINE ParBeginTransferPattern2D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParBeginTransferPattern3D --- Start an ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParBeginTransferPattern3D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_sendirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT(IN) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT(IN) :: InBuf(:,:,:) ! Input buffer
! !OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT(OUT) :: OutBuf(:,:,:)! Output buffer
! !DESCRIPTION:
!
! This routine initiates an async. transfer of an array InBuf.
! The communication pattern indicates the indices outgoing
! values of InBuf and incoming values for OutBuf. This routine
! is fundamentally equivalent to ParBeginTransferReal; the use
! of a communication pattern is largely a performance enhancement,
! since it eliminates the need for intermediate buffering.
!
! Wait handles InHandle and OutHandle are module variables
! The buffers may not be accessed until after the call to
! ParEndTransferReal.
!
! !REVISION HISTORY:
! 01.10.01 Sawyer Creation from ParBeginTransferPattern
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_sendirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN3D" )
CALL mp_sendirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN3D" )
RETURN
!EOC
END SUBROUTINE ParBeginTransferPattern3D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParBeginTransferPattern4D --- Start an ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParBeginTransferPattern4D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_sendirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT(IN) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT(IN) :: InBuf(:,:,:,:) ! Input buffer
! !OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT(OUT) :: OutBuf(:,:,:,:)! Output buffer
! !DESCRIPTION:
!
! This routine initiates an async. transfer of an array InBuf.
! The communication pattern indicates the indices outgoing
! values of InBuf and incoming values for OutBuf. This routine
! is fundamentally equivalent to ParBeginTransferReal; the use
! of a communication pattern is largely a performance enhancement,
! since it eliminates the need for intermediate buffering.
!
! Wait handles InHandle and OutHandle are module variables
! The buffers may not be accessed until after the call to
! ParEndTransferReal.
!
! !REVISION HISTORY:
! 02.12.19 Sawyer Creation from ParBeginTransferPattern
! 03.06.24 Sawyer All complexity now in mp_sendirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN4D" )
CALL mp_sendirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN4D" )
RETURN
!EOC
END SUBROUTINE ParBeginTransferPattern4D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParEndTransferReal --- Complete an ASYNC Real Transfer
!
! !INTERFACE:
SUBROUTINE ParEndTransferReal( InComm, NrInPackets, NrOutPackets, & 1
Dest, Src, InBuf, InIA, &
OutBuf, OutIA )
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: NrInPackets ! Number of in packets
INTEGER, INTENT( IN ) :: NrOutPackets ! Number of out packets
INTEGER, INTENT( IN ) :: Dest(:) ! PE destinations
INTEGER, INTENT( IN ) :: Src(:) ! PE sources
REAL(CPP_REAL8), INTENT(IN) :: InBuf(:) ! Input buffer
INTEGER, INTENT( IN ) :: InIA(:) ! Pointer array
INTEGER, INTENT( IN ) :: OutIA(:) ! Pointer array
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:)! Output buffer
! !DESCRIPTION:
!
! This routine completes an async. transfer of an array
! partitioned into parcels defined by the array InIA. In the
! MPI version, neither InBuf nor OutBuf is not used since
! that information was utilized in ParBeginTransferReal.
!
! The link between StartTransfer and EndTransfer is made possible
! by the InHandle and OutHandle: they reflect the status of
! the ongoing transfer. When this routine completes, a valid
! and accessible copy of the OutBuf is ready for use.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference! The buffers may not be accessed until after the
! completion of ParEndTransferReal.
!
!
! !SYSTEM ROUTINES:
! MPI_COMM_RANK, MPI_ISEND, MPI_IRECV
!
! !REVISION HISTORY:
! 97.09.26 Sawyer Creation
! 97.12.05 Sawyer Renamed Comm to InComm to avoid collisions
! 98.02.26 Sawyer Count through packets, not PEs
! 98.04.16 Sawyer Number of packets become input arguments
! 98.09.04 Sawyer Cleaned interface: handles in common
! 99.03.05 Sawyer Support for contiguous communicators in SHMEM
! 99.04.22 Sawyer Bug fix: replaced MPI_WAIT with MPI_WAITALL
! 99.06.03 Sawyer Bug fix: GroupSize in SHMEM_BARRIER
! 00.07.28 Sawyer Implemented with shared memory arenas
! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
INTEGER Iam, GroupSize, J, Offset, Packet, Ierr
INTEGER InStats(NrInPackets*MPI_STATUS_SIZE)
INTEGER OutStats(NrOutPackets*MPI_STATUS_SIZE)
CPP_ENTER_PROCEDURE( "PARENDTRANSFERREAL" )
!
! Increment the receiver
EndTrf = MOD(EndTrf,MAX_TRF)+1
CPP_ASSERT_F90( NrInPackets .LE. MAX_PAX )
CALL MPI_WAITALL( NrInPackets, InHandle(:,1,EndTrf), InStats, Ierr )
CPP_ASSERT_F90( NrOutPackets .LE. MAX_PAX )
CALL MPI_WAITALL( NrOutPackets, OutHandle(:,1,EndTrf), OutStats, Ierr )
!
! WS 98.09.22 : This barrier needed to synchronize.
!
CALL MPI_BARRIER( InComm, Ierr )
CPP_LEAVE_PROCEDURE( "PARENDTRANSFERREAL" )
RETURN
!EOC
END SUBROUTINE ParEndTransferReal
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParEndTransferPattern1D --- Complete ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParEndTransferPattern1D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_recvirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT( IN ) :: InBuf(*) ! Input buffer
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(*) ! Output buffer
! !DESCRIPTION:
!
! This routine completes an async. transfer of an array communicated
! with a communication pattern.
!
! The link between StartTransfer and EndTransfer is made possible
! by the InHandle and OutHandle: they reflect the status of
! the ongoing transfer. When this routine completes, a valid
! and accessible copy of the OutBuf is ready for use.
! The buffers may not be accessed until after the
! completion of ParEndTransfer.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference.
!
! !REVISION HISTORY:
! 01.02.14 Sawyer Creation from ParEndTransferReal
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_recvirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN1D" )
CALL mp_recvirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN1D" )
RETURN
!EOC
END SUBROUTINE ParEndTransferPattern1D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParEndTransferPattern1Dint --- Complete ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParEndTransferPattern1Dint( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_recvirr_i4
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
INTEGER, INTENT( IN ) :: InBuf(*) ! Input buffer
! !INPUT/OUTPUT PARAMETERS:
INTEGER, INTENT( INOUT ) :: OutBuf(*) ! Output buffer
! !DESCRIPTION:
!
! This routine completes an async. transfer of an array communicated
! with a communication pattern.
!
! The link between StartTransfer and EndTransfer is made possible
! by the InHandle and OutHandle: they reflect the status of
! the ongoing transfer. When this routine completes, a valid
! and accessible copy of the OutBuf is ready for use.
! The buffers may not be accessed until after the
! completion of ParEndTransfer.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference.
!
! !REVISION HISTORY:
! 01.02.14 Sawyer Creation from ParEndTransferReal
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_recvirr_i4
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN1DINT" )
CALL mp_recvirr_i4
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN1DINT" )
RETURN
!EOC
END SUBROUTINE ParEndTransferPattern1Dint
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParEndTransferPattern2D --- Complete an ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParEndTransferPattern2D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_recvirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT( IN ) :: InBuf(:,:) ! Input buffer
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:,:) ! Output buffer
! !DESCRIPTION:
!
! This routine completes an async. transfer of an array communicated
! with a communication pattern.
!
! The link between StartTransfer and EndTransfer is made possible
! by the InHandle and OutHandle: they reflect the status of
! the ongoing transfer. When this routine completes, a valid
! and accessible copy of the OutBuf is ready for use.
! The buffers may not be accessed until after the
! completion of ParEndTransfer.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference.
!
! !REVISION HISTORY:
! 01.10.01 Sawyer Creation from ParEndTransferPattern
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_recvirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN2D" )
CALL mp_recvirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf(:,:),OutBuf(:,:) )
CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN2D" )
RETURN
!EOC
END SUBROUTINE ParEndTransferPattern2D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParEndTransferPattern3D --- Complete an ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParEndTransferPattern3D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_recvirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT( IN ) :: InBuf(:,:,:) ! Input buffer
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:,:,:) ! Output buffer
! !DESCRIPTION:
!
! This routine completes an async. transfer of an array communicated
! with a communication pattern.
!
! The link between StartTransfer and EndTransfer is made possible
! by the InHandle and OutHandle: they reflect the status of
! the ongoing transfer. When this routine completes, a valid
! and accessible copy of the OutBuf is ready for use.
! The buffers may not be accessed until after the
! completion of ParEndTransfer.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference.
!
! !REVISION HISTORY:
! 01.10.01 Sawyer Creation from ParEndTransferPattern
! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
! 03.06.24 Sawyer All complexity now in mp_recvirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN3D" )
CALL mp_recvirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf(:,:,:),OutBuf(:,:,:) )
CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN3D" )
RETURN
!EOC
END SUBROUTINE ParEndTransferPattern3D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParEndTransferPattern4D --- Complete an ASYNC Pattern Transfer
!
! !INTERFACE:
SUBROUTINE ParEndTransferPattern4D( InComm, Pattern, InBuf, OutBuf ) 1,2
! !USES:
USE mod_comm
, ONLY : mp_recvirr
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
REAL(CPP_REAL8), INTENT( IN ) :: InBuf(:,:,:,:) ! Input buffer
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:,:,:,:)! Output buffer
! !DESCRIPTION:
!
! This routine completes an async. transfer of an array communicated
! with a communication pattern.
!
! The link between StartTransfer and EndTransfer is made possible
! by the InHandle and OutHandle: they reflect the status of
! the ongoing transfer. When this routine completes, a valid
! and accessible copy of the OutBuf is ready for use.
! The buffers may not be accessed until after the
! completion of ParEndTransfer.
!
! !BUGS:
!
! It is assumed that the buffers are passed to this routine by
! reference.
!
! !REVISION HISTORY:
! 02.12.19 Sawyer Creation from ParEndTransferPattern
! 03.06.24 Sawyer All complexity now in mp_recvirr
!
!EOP
!-----------------------------------------------------------------------
!BOC
CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN4D" )
CALL mp_recvirr
( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN4D" )
RETURN
!EOC
END SUBROUTINE ParEndTransferPattern4D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParExchangeVectorReal --- Exchange a sparse packed vector
!
! !INTERFACE:
SUBROUTINE ParExchangeVectorReal ( InComm, LenInVector, InVector, & 1
LenOutVector, OutVector )
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
REAL(CPP_REAL8), INTENT( IN ):: InVector( * ) ! The input buffer
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
REAL(CPP_REAL8), INTENT( OUT ) :: OutVector( * ) ! The output buffer
! !DESCRIPTION:
!
! This routine exchanges vectors stored in compressed format, i.e.,
! in so-called compressed sparse row (CSR) format, with other
! PEs. In essence it first exchanges the lengths with
! MPI\_Alltoall, then the exchange of the actual vectors (can be
! different in size) using MPI\_AlltoallV. Since the latter is
! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
!
! !REVISION HISTORY:
! 98.03.17 Sawyer Creation from F77 version
! 98.03.30 Sawyer Removed assumed shape arrays due to problems
! 99.01.18 Sawyer Added barrier for safety
! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
! 00.07.28 Sawyer Implemented with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
!
! !LOCAL VARIABLES:
INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
INTEGER :: Status(MPI_STATUS_SIZE)
Integer, allocatable :: Reqs(:), Stats(:)
CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORREAL" )
CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
CALL MPI_COMM_RANK( InComm, Iam, Ierr )
allocate (Reqs(Nsize))
allocate (Stats(Nsize*MPI_STATUS_SIZE))
#if defined( MY_ALLTOALL )
DO pe = 0, Nsize-1
!
! Send the individual buffers with non-blocking sends
!
nr = LenInVector( pe + 1 )
CALL MPI_ISEND( nr, 1, CPP_MPI_INTEGER, pe, Iam+3000, &
InComm, Reqs( pe+1 ), Ierr )
ENDDO
DO pe = 0, Nsize - 1
!
! Receive the buffers with MPI_Recv. Now we are blocking.
!
CALL MPI_RECV( nr, 1, CPP_MPI_INTEGER, pe, pe+3000, &
InComm, Status, Ierr )
LenOutVector(pe + 1) = nr
ENDDO
CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
#else
CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
LenOutVector, 1, CPP_MPI_INTEGER, &
InComm, Ierr )
#endif
!
! Over all processes
!
icnt = 1
DO pe = 0, Nsize-1
!
! Send the individual buffers with non-blocking sends
!
nr = LenInVector( pe + 1 )
IF ( nr .gt. 0 ) THEN
CALL MPI_ISEND( InVector( icnt ), nr, &
CPP_MPI_REAL8, pe, Iam+2000, &
InComm, Reqs( pe+1 ), Ierr )
ELSE
Reqs( pe+1 ) = MPI_REQUEST_NULL
ENDIF
icnt = icnt + nr
ENDDO
!
! Over all processes
!
icnt = 1
DO pe = 0, Nsize - 1
!
! Receive the buffers with MPI_Recv. Now we are blocking.
!
nr = LenOutVector(pe + 1)
IF ( nr .gt. 0 ) THEN
CALL MPI_RECV( OutVector( icnt ), nr, &
CPP_MPI_REAL8, pe, pe+2000, &
InComm, Status, Ierr )
ENDIF
icnt = icnt + nr
ENDDO
CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
deallocate (Reqs)
deallocate (Stats)
CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORREAL" )
RETURN
!EOC
END SUBROUTINE ParExchangeVectorReal
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParExchangeVectorReal4 --- Exchange a sparse packed vector
!
! !INTERFACE:
SUBROUTINE ParExchangeVectorReal4 ( InComm, LenInVector, InVector,& 1
LenOutVector, OutVector )
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
REAL(CPP_REAL4), INTENT( IN ):: InVector( * ) ! The input buffer
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
REAL(CPP_REAL4), INTENT( OUT ) :: OutVector( * ) ! The output buffer
! !DESCRIPTION:
!
! This routine exchanges vectors stored in compressed format, i.e.,
! in so-called compressed sparse row (CSR) format, with other
! PEs. In essence it first exchanges the lengths with
! MPI\_Alltoall, then the exchange of the actual vectors (can be
! different in size) using MPI\_AlltoallV. Since the latter is
! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
!
! !REVISION HISTORY:
! 98.03.17 Sawyer Creation from F77 version
! 98.03.30 Sawyer Removed assumed shape arrays due to problems
! 99.01.18 Sawyer Added barrier for safety
! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
! 00.07.28 Sawyer Implemented with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
!
! !LOCAL VARIABLES:
INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
INTEGER :: Status(MPI_STATUS_SIZE)
Integer, allocatable :: Reqs(:), Stats(:)
CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORREAL4" )
CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
CALL MPI_COMM_RANK( InComm, Iam, Ierr )
allocate (Reqs(Nsize))
allocate (Stats(Nsize*MPI_STATUS_SIZE))
#if defined( MY_ALLTOALL )
DO pe = 0, Nsize-1
!
! Send the individual buffers with non-blocking sends
!
nr = LenInVector( pe + 1 )
CALL MPI_ISEND( nr, 1, CPP_MPI_INTEGER, pe, Iam+3000, &
InComm, Reqs( pe+1 ), Ierr )
ENDDO
DO pe = 0, Nsize - 1
!
! Receive the buffers with MPI_Recv. Now we are blocking.
!
CALL MPI_RECV( nr, 1, CPP_MPI_INTEGER, pe, pe+3000, &
InComm, Status, Ierr )
LenOutVector(pe + 1) = nr
ENDDO
CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
#else
CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
LenOutVector, 1, CPP_MPI_INTEGER, &
InComm, Ierr )
#endif
!
! Over all processes
!
icnt = 1
DO pe = 0, Nsize-1
!
! Send the individual buffers with non-blocking sends
!
nr = LenInVector( pe + 1 )
IF ( nr .gt. 0 ) THEN
CALL MPI_ISEND( InVector( icnt ), nr, &
CPP_MPI_REAL4, pe, Iam+2000, &
InComm, Reqs( pe+1 ), Ierr )
ELSE
Reqs( pe+1 ) = MPI_REQUEST_NULL
ENDIF
icnt = icnt + nr
ENDDO
!
! Over all processes
!
icnt = 1
DO pe = 0, Nsize - 1
!
! Receive the buffers with MPI_Recv. Now we are blocking.
!
nr = LenOutVector(pe + 1)
IF ( nr .gt. 0 ) THEN
CALL MPI_RECV( OutVector( icnt ), nr, &
CPP_MPI_REAL4, pe, pe+2000, &
InComm, Status, Ierr )
ENDIF
icnt = icnt + nr
ENDDO
CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
deallocate (Reqs)
deallocate (Stats)
CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORREAL4" )
RETURN
!EOC
END SUBROUTINE ParExchangeVectorReal4
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParExchangeVectorInt --- Exchange a sparse packed vector
!
! !INTERFACE:
SUBROUTINE ParExchangeVectorInt ( InComm, LenInVector, InVector, & 10
LenOutVector, OutVector )
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
INTEGER, INTENT( IN ) :: InVector( * ) ! The input buffer
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
INTEGER, INTENT( OUT ) :: OutVector( * ) ! The output buffer
! !DESCRIPTION:
!
! This routine exchanges vectors stored in compressed format, i.e.,
! in so-called compressed sparse row (CSR) format, with other
! PEs. In essence it first exchanges the lengths with
! MPI\_Alltoall, then the exchange of the actual vectors (can be
! different in size) using MPI\_AlltoallV. Since the latter is
! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
!
! !REVISION HISTORY:
! 98.03.17 Sawyer Creation from F77 version
! 98.03.30 Sawyer Removed assumed shape arrays due to problems
! 99.01.18 Sawyer Added barrier for safety
! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
! 00.07.28 Sawyer Implemented with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
!
! !LOCAL VARIABLES:
INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
INTEGER :: Status(MPI_STATUS_SIZE)
Integer, allocatable :: Reqs(:), Stats(:)
CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORINT" )
CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
CALL MPI_COMM_RANK( InComm, Iam, Ierr )
allocate (Reqs(Nsize))
allocate (Stats(Nsize*MPI_STATUS_SIZE))
#if defined( MY_ALLTOALL )
DO pe = 0, Nsize-1
!
! Send the individual buffers with non-blocking sends
!
nr = LenInVector( pe + 1 )
CALL MPI_ISEND( nr, 1, &
MPI_INTEGER, pe, Iam+3000, &
InComm, Reqs( pe+1 ), Ierr )
ENDDO
DO pe = 0, Nsize - 1
!
! Receive the buffers with MPI_Recv. Now we are blocking.
!
CALL MPI_RECV( nr, 1, &
MPI_INTEGER, pe, pe+3000, &
InComm, Status, Ierr )
LenOutVector(pe + 1) = nr
ENDDO
CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
#else
CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
LenOutVector, 1, CPP_MPI_INTEGER, &
InComm, Ierr )
#endif
!
! Over all processes
!
icnt = 1
DO pe = 0, Nsize-1
!
! Send the individual buffers with non-blocking sends
!
nr = LenInVector( pe + 1 )
IF ( nr .gt. 0 ) THEN
CALL MPI_ISEND( InVector( icnt ), nr, &
CPP_MPI_INTEGER, pe, Iam+2000, &
InComm, Reqs( pe+1 ), Ierr )
ELSE
Reqs( pe+1 ) = MPI_REQUEST_NULL
ENDIF
icnt = icnt + nr
ENDDO
!
! Over all processes
!
icnt = 1
DO pe = 0, Nsize - 1
!
! Receive the buffers with MPI_Recv. Now we are blocking.
!
nr = LenOutVector(pe + 1)
IF ( nr .gt. 0 ) THEN
CALL MPI_RECV( OutVector( icnt ), nr, &
CPP_MPI_INTEGER, pe, pe+2000, &
InComm, Status, Ierr )
ENDIF
icnt = icnt + nr
ENDDO
CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
!
! WS 98.09.22 : This barrier needed to synchronize. Why?
!
CALL MPI_BARRIER( InComm, Ierr )
deallocate (Reqs)
deallocate (Stats)
CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORINT" )
RETURN
!EOC
END SUBROUTINE ParExchangeVectorInt
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollectiveBarrier --- Barrier: Simplest collective op.
!
! !INTERFACE:
SUBROUTINE ParCollectiveBarrier( InComm ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
! !DESCRIPTION:
!
! This routine performs a barrier only within the communicator InComm
!
! !REVISION HISTORY:
! 00.09.10 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
CALL MPI_Barrier(InComm, Ierror )
RETURN
!EOC
END SUBROUTINE ParCollectiveBarrier
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective0D --- Perform global Collective of a scalar
!
! !INTERFACE:
SUBROUTINE ParCollective0D( InComm, Op, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: Var ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
REAL(CPP_REAL8) Tmp
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, 1, CPP_MPI_REAL8, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var, Tmp, 1, CPP_MPI_REAL8, &
Op, InComm, Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective0D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective1D --- Perform component-wise global Collective of a vector
!
! !INTERFACE:
SUBROUTINE ParCollective1D( InComm, Op, Im, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
REAL(CPP_REAL8) Tmp(Im)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im, CPP_MPI_REAL8, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var, Tmp, Im, CPP_MPI_REAL8, &
Op, InComm, Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective1D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective1DReal4 --- Perform component-wise global Collective of a vector
!
! !INTERFACE:
SUBROUTINE ParCollective1DReal4( InComm, Op, Im, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL4), INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
REAL(CPP_REAL4) Tmp(Im)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im, CPP_MPI_REAL4, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var, Tmp, Im, CPP_MPI_REAL4, &
Op, InComm, Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective1DReal4
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective2D --- Perform component-wise collective operation
!
! !INTERFACE:
SUBROUTINE ParCollective2D( InComm, Op, Im, Jm, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! First dimension of 2-D array
INTEGER, INTENT( IN ) :: Jm ! Second dimension of 2-D array
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ) :: Var(Im,Jm) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
REAL(CPP_REAL8) Tmp(Im,Jm)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_REAL8, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_REAL8, &
Op, InComm, Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective2D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective2DReal4 --- Perform component-wise collective operation
!
! !INTERFACE:
SUBROUTINE ParCollective2DReal4( InComm, Op, Im, Jm, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! First dimension of 2-D array
INTEGER, INTENT( IN ) :: Jm ! Second dimension of 2-D array
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL4), INTENT( INOUT ) :: Var(Im,Jm) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
REAL(CPP_REAL4) Tmp(Im,Jm)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_REAL4, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_REAL4, &
Op, InComm, Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective2DReal4
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective3D --- Perform component-wise global Collective of a vector
!
! !INTERFACE:
SUBROUTINE ParCollective3D( InComm, Op, Im, Jm, Lm, Var ) 4
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! First dimension of 3-D array
INTEGER, INTENT( IN ) :: Jm ! Second dimension of 3-D array
INTEGER, INTENT( IN ) :: Lm ! Third dimension of 3-D array
! !INPUT/OUTPUT PARAMETERS:
REAL(CPP_REAL8), INTENT( INOUT ):: Var(Im,Jm,LM) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
REAL(CPP_REAL8) Tmp(Im,Jm,Lm)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im*Jm*Lm, CPP_MPI_REAL8, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm*Lm, CPP_MPI_REAL8, &
Op, InComm, Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective3D
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective0DInt --- Perform global Collective of a scalar
!
! !INTERFACE:
SUBROUTINE ParCollective0DInt( InComm, Op, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
! !INPUT/OUTPUT PARAMETERS:
INTEGER, INTENT( INOUT ) :: Var ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
INTEGER Tmp
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, 1, CPP_MPI_INTEGER, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var,Tmp,1,CPP_MPI_INTEGER,Op,InComm,Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective0DInt
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective0DStr --- Perform global Collective of a string
!
! !INTERFACE:
SUBROUTINE ParCollective0DStr( InComm, Op, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
! !INPUT/OUTPUT PARAMETERS:
CHARACTER (LEN=*), INTENT( INOUT ) :: Var ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror, StrLen
StrLen = LEN(Var)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, StrLen, MPI_CHARACTER, 0, InComm, Ierror )
ELSE
write(iulog,*) "global reduction of string not supported"
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective0DStr
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective1DStr --- Perform global Collective of a string
!
! !INTERFACE:
SUBROUTINE ParCollective1DStr( InComm, Op, Im, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
! !INPUT/OUTPUT PARAMETERS:
CHARACTER (LEN=*), INTENT( INOUT ) :: Var(:) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror, StrLen
StrLen = LEN(Var(1))
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im*StrLen, MPI_CHARACTER, 0, InComm, Ierror )
ELSE
write(iulog,*) "global reduction of string not supported"
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective1DStr
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective1DInt --- Perform component-wise global
! collective operations of int vector
!
! !INTERFACE:
SUBROUTINE ParCollective1DInt( InComm, Op, Im, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
! !INPUT/OUTPUT PARAMETERS:
INTEGER, INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
INTEGER Tmp(Im)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im, CPP_MPI_INTEGER, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var,Tmp,Im,CPP_MPI_INTEGER,Op,InComm,Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective1DInt
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: ParCollective2DInt --- Perform component-wise collective op.
!
! !INTERFACE:
SUBROUTINE ParCollective2DInt( InComm, Op, Im, Jm, Var ) 1
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: Op ! Operation (see header)
INTEGER, INTENT( IN ) :: Im ! First dimension of 2-D array
INTEGER, INTENT( IN ) :: Jm ! Second dimension of 2-D array
! !INPUT/OUTPUT PARAMETERS:
INTEGER, INTENT( INOUT ):: Var(Im,Jm) ! partial Var in, Var out
! !DESCRIPTION:
!
! This utility makes a collective operation over all processes in
! communicator InComm.
!
! !REVISION HISTORY:
! 00.08.07 Sawyer Creation
!
!EOP
!---------------------------------------------------------------------
!BOC
INTEGER Ierror
INTEGER Tmp(Im,Jm)
IF ( Op .EQ. BCSTOP ) THEN
CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_INTEGER, 0, InComm, Ierror )
ELSE
CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_INTEGER, &
Op, InComm, Ierror )
Var = Tmp
ENDIF
RETURN
!EOC
END SUBROUTINE ParCollective2DInt
!-----------------------------------------------------------------------
# ifdef _SMEMORY
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParExchangeLength --- Exchange a sparse packed vector
!
! !INTERFACE:
SUBROUTINE ParExchangeLength ( InComm, LenInVector, LenOutVector) 4
! !USES:
IMPLICIT NONE
! !INPUT PARAMETERS:
INTEGER, INTENT( IN ) :: InComm ! Communicator
INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
! !OUTPUT PARAMETERS:
INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
! !DESCRIPTION:
!
! This routine exchanges vectors stored in compressed format, i.e.,
! in so-called compressed sparse row (CSR) format, with other
! PEs. In essence it first exchanges the lengths with
! MPI\_Alltoall, then the exchange of the actual vectors (can be
! different in size) using MPI\_AlltoallV. Since the latter is
! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
!
! !SYSTEM ROUTINES:
! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
!
! !REVISION HISTORY:
! 98.03.17 Sawyer Creation from F77 version
! 98.03.30 Sawyer Removed assumed shape arrays due to problems
! 99.01.18 Sawyer Added barrier for safety
! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
! 00.07.28 Sawyer Implemented with shared memory arenas
!
!EOP
!-----------------------------------------------------------------------
!BOC
!
! !LOCAL VARIABLES:
INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
INTEGER :: Status(MPI_STATUS_SIZE)
Integer, allocatable :: Reqs(:), Stats(:)
CPP_ENTER_PROCEDURE( "PAREXCHANGELENGTH" )
CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
CALL MPI_COMM_RANK( InComm, Iam, Ierr )
allocate (Reqs(Nsize))
allocate (Stats(Nsize*MPI_STATUS_SIZE))
#if defined( MY_ALLTOALL )
DO pe = 0, Nsize-1
!
! Send the individual buffers with non-blocking sends
!
nr = LenInVector( pe + 1 )
CALL MPI_ISEND( nr, 1, &
MPI_INTEGER, pe, Iam+3000, &
InComm, Reqs( pe+1 ), Ierr )
ENDDO
DO pe = 0, Nsize - 1
!
! Receive the buffers with MPI_Recv. Now we are blocking.
!
CALL MPI_RECV( nr, 1, &
MPI_INTEGER, pe, pe+3000, &
InComm, Status, Ierr )
LenOutVector(pe + 1) = nr
ENDDO
CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
deallocate (Reqs)
deallocate (Stats)
#else
CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
LenOutVector, 1, CPP_MPI_INTEGER, &
InComm, Ierr )
#endif
CALL MPI_BARRIER( InComm, Ierr )
CPP_LEAVE_PROCEDURE( "PAREXCHANGELENGTH" )
RETURN
!EOC
END SUBROUTINE ParExchangeLength
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParCalcInfoDecompToGhost --- calculates info about the pattern
!
! !INTERFACE:
subroutine ParCalcInfoDecompToGhost(InComm, DA,GB,Info) 1,6
!
! !USES:
USE decompmodule
, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
USE ghostmodule
, ONLY : GhostType,GhostInfo
IMPLICIT NONE
! !INPUT PARAMETERS:
integer, intent(in) :: InComm ! communicator
type(DecompType), intent(in) :: DA ! Source Decomp Desc
type(GhostType) , intent(in) :: GB ! Destination Ghost Desc
! !OUTPUT PARAMETERS:
type (ParInfoType), intent(out) :: Info ! Info structure
!
! !DESCRIPTION:
! This routine calulcates the information about a communication
! pattern that transforms from one decomposition to another,
! i.e., a so-called "transpose". This is a copy of an algorithm
! from the ParPatternDecompToGhost subroutine.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
!
! !REVISION HISTORY:
! 07.09.04 Dennis Creation based on algorithm in ParPatternDecompToGhost
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
integer :: nTags,oldpe,oldlocal,sTag,eTag,nCount
integer :: j,pe,local,tag,ierr,iam,npes
integer :: npesA,npesB,tmpA,tmp1B,tmp2B,tmp3B
integer, allocatable :: sCount(:),rCount(:)
call DecompInfo
(DA,npesA,tmpA)
call GhostInfo
(GB,npesB,tmp1B,tmp2B,tmp3B)
call MPI_COMM_SIZE(InComm,npes,ierr)
call MPI_COMM_RANK(InComm,iam,ierr)
allocate(sCount(npes),rCount(npes))
sCount=0
rCount=0
if(iam .lt. npesB) then
! Parse through all the tags in the local segment
nTags = SIZE(GB%Local%Head(iam+1)%StartTags)
do j=1,nTags
oldpe = -1
oldlocal = 0
sTag = GB%Local%Head(iam+1)%StartTags(j)
eTag = GB%Local%Head(iam+1)%EndTags(j)
do tag = sTag,eTag
if(tag > 0) then
!
! Determine the index and PE of this entry on A. This might be inlined later
!
call DecompGlobalToLocal
(DA,tag,Local,Pe)
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
if( pe /= oldpe .or. local /= oldlocal+1) then
sCount(pe+1) = sCount(pe+1) + 1
endif
oldpe = pe ! Update PE
oldlocal = local ! Update local index
endif
enddo
enddo
endif
! Calculate the length of receive segments
call ParExchangeLength
(InComm,sCount,rCount)
! Record some information
Info%numSendSeg = SUM(sCount)
InFo%numSendNeigh = COUNT(sCount > 0)
Info%numRecvSeg = SUM(rCount)
InFo%numRecvNeigh = COUNT(rCount > 0)
nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
deallocate(sCount,rCount)
CPP_LEAVE_PROCEDURE( "PARCALCLENGTHDECOMPTOGHOST" )
RETURN
!EOC
end subroutine ParCalcInfoDecompToGhost
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParCalcInfoDecompToDecomp --- calculates info about the pattern
!
! !INTERFACE:
subroutine ParCalcInfoDecompToDecomp(InComm, DA,DB,Info) 1,5
!
! !USES:
USE decompmodule
, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
IMPLICIT NONE
! !INPUT PARAMETERS:
integer, intent(in) :: InComm ! communicator
type(DecompType), intent(in) :: DA ! Source Decomp Desc
type(DecompType), intent(in) :: DB ! Destination Decomp Desc
! !OUTPUT PARAMETERS:
type (ParInfoType), intent(out) :: Info ! Info structure
!
! !DESCRIPTION:
! This routine calulcates the information about a communication
! pattern that transforms from one decomposition to another,
! i.e., a so-called "transpose". This is a copy of an algorithm
! from the ParPatternDecompToDecomp subroutine.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
!
! !REVISION HISTORY:
! 07.09.04 Dennis Creation based on algorithm in ParPatternDecompToDecomp
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
integer :: nCount,npes,iam,ierr
integer :: nTags,oldpe,oldlocal,sTag,eTag
integer :: j,pe,local,tag,tmpA,tmpB,npesA,npesB
integer, allocatable :: sCount(:),rCount(:)
call DecompInfo
(DA,npesA,tmpA)
call DecompInfo
(DB,npesB,tmpB)
call MPI_COMM_SIZE(InComm,npes,ierr)
call MPI_COMM_RANK(InComm,iam,ierr)
allocate(sCount(npes),rCount(npes))
sCount=0
rCount=0
if(iam .lt. npesB) then
! Parse through all the tags in the local segment
nTags = SIZE(DB%Head(iam+1)%StartTags)
do j=1,nTags
oldpe = -1
sTag = DB%Head(iam+1)%StartTags(j)
eTag = DB%Head(iam+1)%EndTags(j)
do tag = sTag,eTag
!
! Determine the index and PE of this entry on A. This might be inlined later
!
call DecompGlobalToLocal
(DA,tag,Local,Pe)
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
if( pe /= oldpe ) then
oldpe = pe
sCount(pe+1) = sCount(pe+1) + 1
endif
enddo
enddo
endif
! Calculate the length of recieve segments
call ParExchangeLength
(InComm,sCount,rCount)
! Record some information
Info%numSendSeg = SUM(sCount)
InFo%numSendNeigh = COUNT(sCount > 0)
Info%numRecvSeg = SUM(rCount)
InFo%numRecvNeigh = COUNT(rCount > 0)
nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
deallocate(sCount,rCount)
CPP_LEAVE_PROCEDURE( "PARCALCINFODECOMPTODECOMP" )
RETURN
!EOC
end subroutine ParCalcInfoDecompToDecomp
!--------------------------------------------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParCalcInfoGhostToDecomp --- calculates info about the pattern
!
! !INTERFACE:
subroutine ParCalcInfoGhostToDecomp(InComm, GA,DB,Info) 1,6
!
! !USES:
USE decompmodule
, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
USE ghostmodule
, ONLY : GhostType,GhostInfo
IMPLICIT NONE
! !INPUT PARAMETERS:
integer, intent(in) :: InComm ! communicator
type(GhostType), intent(in) :: GA ! Source Ghost Desc
type(DecompType), intent(in) :: DB ! Destination Decomp Desc
! !OUTPUT PARAMETERS:
type (ParInfoType), intent(out) :: Info ! Info structure
!
! !DESCRIPTION:
! This routine calulcates the information about a communication
! pattern that transforms from one decomposition to another,
! i.e., a so-called "transpose". This is a copy of an algorithm
! from the ParPatternGhostToDecomp subroutine.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
!
! !REVISION HISTORY:
! 07.09.04 Dennis Creation based on algorithm in ParPatternGhostToDecomp
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
integer :: nTags,oldpe,oldlocal,sTag,eTag
integer :: npes, nCount,iam,ierr
integer :: j,pe,local,tag,npesA,npesB,tmpB,tmp1A,tmp2A,tmp3A
integer, allocatable :: sCount(:),rCount(:)
call GhostInfo
(GA,npesA,tmp1A,tmp2A,tmp3A)
call DecompInfo
(DB,npesB,tmpB)
call MPI_COMM_SIZE(InComm,npes,ierr)
call MPI_COMM_RANK(InComm,iam,ierr)
allocate(sCount(npes),rCount(npes))
sCount=0
rCount=0
if(iam .lt. npesB) then
! Parse through all the tags in the local segment
nTags = SIZE(DB%Head(iam+1)%StartTags)
do j=1,nTags
oldpe = -1
oldlocal = 0
sTag = DB%Head(iam+1)%StartTags(j)
eTag = DB%Head(iam+1)%EndTags(j)
do tag = sTag,eTag
!
! Determine the index and PE of this entry on A. This might be inlined later
!
call DecompGlobalToLocal
(GA%Decomp,tag,Local,Pe)
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
if( pe /= oldpe .or. local /= OldLocal+1 ) then
sCount(pe+1) = sCount(pe+1) + 1
endif
oldpe = pe
oldlocal = local
enddo
enddo
endif
! Calculate the lenght of recieve segments
call ParExchangeLength
(InComm,sCount,rCount)
! Record some information
Info%numSendSeg = SUM(sCount)
InFo%numSendNeigh = COUNT(sCount > 0)
Info%numRecvSeg = SUM(rCount)
InFo%numRecvNeigh = COUNT(rCount > 0)
nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
deallocate(sCount,rCount)
CPP_LEAVE_PROCEDURE( "PARCALCLENGTHGHOSTTODECOMP" )
RETURN
!EOC
end subroutine ParCalcInfoGhostToDecomp
!-----------------------------------------------------------------------
!-----------------------------------------------------------------------
!BOP
! !IROUTINE: ParCalcInfoGhostToGhost --- calculates info about the pattern
!
! !INTERFACE:
subroutine ParCalcInfoGhostToGhost(InComm, GA,GB,Info) 1,6
!
! !USES:
USE decompmodule
, ONLY : DecompGlobalToLocal
USE ghostmodule
, ONLY : GhostType,GhostInfo
IMPLICIT NONE
! !INPUT PARAMETERS:
integer, intent(in) :: InComm ! communicator
type(GhostType), intent(in) :: GA ! Source Ghost Desc
type(GhostType), intent(in) :: GB ! Destination Ghost Desc
! !OUTPUT PARAMETERS:
type (ParInfoType), intent(out) :: Info ! Info structure
!
! !DESCRIPTION:
! This routine calulcates the information about a communication
! pattern that transforms from one decomposition to another,
! i.e., a so-called "transpose". This is a copy of an algorithm
! from the ParPatternGhostToGhost subroutine.
!
! !SYSTEM ROUTINES:
! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
!
! !REVISION HISTORY:
! 07.09.04 Dennis Creation based on algorithm in ParPatternGhostToGhost
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
integer :: nTags,oldpe,oldlocal,sTag,eTag,ierr,nCount
integer :: j,pe,local,tag,npes,iam,npesA,npesB
integer :: tmp1A,tmp2A,tmp3A,tmp1B,tmp2B,tmp3B
integer, allocatable :: sCount(:),rCount(:)
call GhostInfo
(GA,npesA,tmp1A,tmp2A,tmp3A)
call GhostInfo
(GB,npesB,tmp1B,tmp2B,tmp3B)
call MPI_COMM_SIZE(InComm,npes,ierr)
call MPI_COMM_RANK(InComm,iam,ierr)
allocate(sCount(npes),rCount(npes))
sCount=0
rCount=0
if(iam .lt. npesB) then
! Parse through all the tags in the local segment
nTags = SIZE(GB%Local%Head(iam+1)%StartTags)
do j=1,nTags
oldpe = -1
oldlocal = 0
sTag = GB%Local%Head(iam+1)%StartTags(j)
eTag = GB%Local%Head(iam+1)%EndTags(j)
do tag = sTag,eTag
if (Tag > 0 ) THEN
!
! Determine the index and PE of this entry on A. This might be inlined later
!
call DecompGlobalToLocal
(GA%Decomp,tag,Local,Pe)
!
! If ipe-1 is my id, then this is an entry ipe will receive from Pe
!
if( pe /= oldpe .or. local /= OldLocal+1 ) then
sCount(pe+1)=sCount(pe+1)+1
endif
oldpe = pe
oldlocal = local
endif
enddo
enddo
endif
! Calculate the length of receive segments
call ParExchangeLength
(InComm,sCount,rCount)
! Record some information
Info%numSendSeg = SUM(sCount)
InFo%numSendNeigh = COUNT(sCount > 0)
Info%numRecvSeg = SUM(rCount)
InFo%numRecvNeigh = COUNT(rCount > 0)
nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
deallocate(sCount,rCount)
CPP_LEAVE_PROCEDURE( "PARCALCINFOGHOSTTOGHOST" )
RETURN
!EOC
end subroutine ParCalcInfoGhostToGhost
# endif
#endif
END MODULE parutilitiesmodule