!s||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| !BOP ! !MODULE: ice_distribution module ice_distribution 6,9 ! ! !DESCRIPTION: ! This module provides data types and routines for distributing ! blocks across processors. ! ! !REVISION HISTORY: ! SVN:$Id: ice_distribution.F90 118 2008-04-08 20:57:17Z eclare $ ! ! author: Phil Jones, LANL ! Oct. 2004: Adapted from POP by William H. Lipscomb, LANL ! Jan. 2008: Elizabeth Hunke updated to new POP infrastructure ! ! !USES: use ice_kinds_mod use ice_communicate use ice_domain_size use ice_blocks use ice_exit use ice_fileunits, only: nu_diag, nu_timing, ice_stdout use ice_spacecurve use ice_broadcast ! use ice_probability, only: EstimateCost, BuildProbabilityStats, WoriteProbabilityStats use ice_probability_tools implicit none private save ! !PUBLIC TYPES: type, public :: distrb ! distribution data type integer (int_kind) :: & nprocs ,&! number of processors in this dist communicator ,&! communicator to use in this dist numLocalBlocks ! number of blocks distributed to this ! local processor integer (int_kind), dimension(:), pointer :: & blockLocation ,&! processor location for all blocks blockLocalID ,&! local block id for all blocks blockGlobalID ! global block id for each local block integer (int_kind), dimension(:), pointer :: blockCnt integer (int_kind), dimension(:,:), pointer :: blockIndex end type integer (int_kind), public, parameter :: & ! types of blocks: lndType = 0, & ! Land icefreeType = 1, & ! ice free (ocean only) iceType = 2 ! sea ice ! integer, parameter :: numCoeff = 5 ! !PUBLIC MEMBER FUNCTIONS: public :: create_distribution, & ice_distributionGet, & ice_distributionGetBlockLoc, & ice_distributionGetBlockID, & create_local_block_ids ! !PUBLIC DATA MEMBERS: character (char_len), public :: & processor_shape ! 'square-pop' (approx) POP default config ! 'square-ice' like square-pop but better for ice ! 'slenderX1' (NPX x 1) ! 'slenderX2' (NPX x 2) !EOP !BOC !EOC !*********************************************************************** contains !*********************************************************************** !BOP ! !IROUTINE: create_distribution ! !INTERFACE: function create_distribution(dist_type, nprocs, maxBlock, work_per_block, prob_per_block, blockType, bStats, FixMaxBlock) 3,8 ! !DESCRIPTION: ! This routine determines the distribution of blocks across processors ! by call the appropriate subroutine based on distribution type ! requested. Currently three distributions are supported: ! 2-d Cartesian distribution (cartesian), a load-balanced ! distribution using a rake algorithm based on an input amount of work ! per block, and a space-filling-curve algorithm. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: character (*), intent(in) :: & dist_type ! method for distributing blocks ! either cartesian or rake integer (int_kind), intent(in) :: & nprocs ! number of processors in this distribution integer (int_kind), intent(in) :: maxBlock ! maximum number of blocks to use integer (int_kind), dimension(:), intent(in) :: & work_per_block ! amount of work per block real (dbl_kind), dimension(:), intent(in) :: & prob_per_block ! probability that block contains sea-ice integer (int_kind), dimension(:), intent(in) :: & blockType ! type of block real (dbl_kind), dimension(:,:), intent(in) :: bStats ! block statistics logical, intent(in) :: FixMaxBlock ! !OUTPUT PARAMETERS: type (distrb) :: & create_distribution ! resulting structure describing ! distribution of blocks !EOP !BOC !---------------------------------------------------------------------- ! ! select the appropriate distribution type ! !---------------------------------------------------------------------- select case (trim(dist_type)) case('cartesian') create_distribution = create_distrb_cart(nprocs, work_per_block) case('rake') create_distribution = create_distrb_rake(nprocs, work_per_block) case('spacecurve') !DBG print *,'before call to create_distrb_spacecurve' create_distribution = create_distrb_spacecurve(nprocs, maxBlock, & work_per_block,prob_per_block,blockType,bStats, FixMaxBlock ) !DBG stop 'create_distribution: after call to create_distrb_spacecurve' case default call abort_ice('ice distribution: unknown distribution type') end select if(my_task == master_task) then write(nu_diag,*) 'Active processors: ',MAXVAL(create_distribution%blockLocation) endif !DBG print *,'end of create_distribution' !----------------------------------------------------------------------- !EOC end function create_distribution !*********************************************************************** !BOP ! !IROUTINE: create_local_block_ids ! !INTERFACE: subroutine create_local_block_ids(block_ids, distribution) 3 ! !DESCRIPTION: ! This routine determines which blocks in an input distribution are ! located on the local processor and creates an array of block ids ! for all local blocks. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (distrb), intent(in) :: & distribution ! input distribution for which local ! blocks required ! !OUTPUT PARAMETERS: integer (int_kind), dimension(:), pointer :: & block_ids ! array of block ids for every block ! that resides on the local processor !EOP !BOC !----------------------------------------------------------------------- ! ! local variables ! !----------------------------------------------------------------------- integer (int_kind) :: & n, bid, bcount ! dummy counters logical (log_kind) :: dbug !----------------------------------------------------------------------- ! ! first determine number of local blocks to allocate array ! !----------------------------------------------------------------------- bcount = 0 do n=1,size(distribution%blockLocation) if (distribution%blockLocation(n) == my_task+1) bcount = bcount + 1 end do if (bcount > 0) allocate(block_ids(bcount)) !----------------------------------------------------------------------- ! ! now fill array with proper block ids ! !----------------------------------------------------------------------- ! dbug = .true. dbug = .false. if (bcount > 0) then do n=1,size(distribution%blockLocation) if (distribution%blockLocation(n) == my_task+1) then block_ids(distribution%blockLocalID(n)) = n if (dbug) then write(nu_diag,*) 'block id, proc, local_block: ', & block_ids(distribution%blockLocalID(n)), & distribution%blockLocation(n), & distribution%blockLocalID(n) endif endif end do endif !EOC end subroutine create_local_block_ids !*********************************************************************** !BOP ! !IROUTINE: proc_decomposition ! !INTERFACE: subroutine proc_decomposition(nprocs, nprocs_x, nprocs_y) 4,3 ! !DESCRIPTION: ! This subroutine attempts to find an optimal (nearly square) ! 2d processor decomposition for a given number of processors. ! ! !REVISION HISTORY: ! same as module ! ! !USES: use ice_domain_size ! !INPUT PARAMETERS: integer (int_kind), intent(in) :: & nprocs ! total number or processors ! !OUTPUT PARAMETERS: integer (int_kind), intent(out) :: & nprocs_x, nprocs_y ! number of procs in each dimension !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (int_kind) :: & iguess, jguess ! guesses for nproc_x,y real (real_kind) :: & square ! square root of nprocs !---------------------------------------------------------------------- ! ! start with an initial guess ! !---------------------------------------------------------------------- square = sqrt(real(nprocs,kind=real_kind)) nprocs_x = 0 nprocs_y = 0 if (processor_shape == 'square-pop') then ! make as square as possible iguess = nint(square) jguess = nprocs/iguess elseif (processor_shape == 'square-ice') then ! better for bipolar ice jguess = nint(square) iguess = nprocs/jguess elseif (processor_shape == 'slenderX1') then ! 1 proc in y direction jguess = 1 iguess = nprocs/jguess else ! 2 processors in y direction jguess = min(2, nprocs) iguess = nprocs/jguess endif !---------------------------------------------------------------------- ! ! try various decompositions to find the best ! !---------------------------------------------------------------------- proc_loop: do if (processor_shape == 'square-pop') then jguess = nprocs/iguess else iguess = nprocs/jguess endif if (iguess*jguess == nprocs) then ! valid decomp !*** if the blocks can be evenly distributed, it is a !*** good decomposition if (mod(nblocks_x,iguess) == 0 .and. & mod(nblocks_y,jguess) == 0) then nprocs_x = iguess nprocs_y = jguess exit proc_loop !*** if the blocks can be evenly distributed in a !*** transposed direction, it is a good decomposition else if (mod(nblocks_x,jguess) == 0 .and. & mod(nblocks_y,iguess) == 0) then nprocs_x = jguess nprocs_y = iguess exit proc_loop !*** A valid decomposition, but keep searching for !*** a better one else if (nprocs_x == 0) then nprocs_x = iguess nprocs_y = jguess endif if (processor_shape == 'square-pop') then iguess = iguess - 1 if (iguess == 0) then exit proc_loop else cycle proc_loop endif else jguess = jguess - 1 if (jguess == 0) then exit proc_loop else cycle proc_loop endif endif endif else ! invalid decomp - keep trying if (processor_shape == 'square-pop') then iguess = iguess - 1 if (iguess == 0) then exit proc_loop else cycle proc_loop endif else jguess = jguess - 1 if (jguess == 0) then exit proc_loop else cycle proc_loop endif endif endif end do proc_loop if (nprocs_x == 0) then call abort_ice('ice: Unable to find 2d processor config') endif if (my_task == master_task) then write(nu_diag,'(a23,i4,a3,i4)') ' Processors (X x Y) = ', & nprocs_x,' x ',nprocs_y endif !---------------------------------------------------------------------- !EOC end subroutine proc_decomposition !********************************************************************** !BOP ! !IROUTINE: ice_distributionDestroy ! !INTERFACE: subroutine ice_distributionDestroy(distribution) 1 ! !DESCRIPTION: ! This routine destroys a defined distribution by deallocating ! all memory associated with the distribution. ! ! !REVISION HISTORY: ! same as module ! !INPUT/OUTPUT PARAMETERS: type (distrb), intent(inout) :: & distribution ! distribution to destroy ! !OUTPUT PARAMETERS: !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (int_kind) :: istat ! status flag for deallocate !---------------------------------------------------------------------- ! ! reset scalars ! !---------------------------------------------------------------------- distribution%nprocs = 0 distribution%communicator = 0 distribution%numLocalBlocks = 0 !---------------------------------------------------------------------- ! ! deallocate arrays ! !---------------------------------------------------------------------- deallocate(distribution%blockLocation, stat=istat) deallocate(distribution%blockLocalID , stat=istat) deallocate(distribution%blockGlobalID, stat=istat) deallocate(distribution%blockCnt, stat=istat) !----------------------------------------------------------------------- !EOC end subroutine ice_distributionDestroy !*********************************************************************** !BOP ! !IROUTINE: ice_distributionGet ! !INTERFACE: subroutine ice_distributionGet(distribution,& 23,3 nprocs, communicator, numLocalBlocks, & blockLocation, blockLocalID, blockGlobalID) ! !DESCRIPTION: ! This routine extracts information from a distribution. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (distrb), intent(in) :: & distribution ! input distribution for which information ! is requested ! !OUTPUT PARAMETERS: integer (int_kind), intent(out), optional :: & nprocs ,&! number of processors in this dist communicator ,&! communicator to use in this dist numLocalBlocks ! number of blocks distributed to this ! local processor integer (int_kind), dimension(:), pointer, optional :: & blockLocation ,&! processor location for all blocks blockLocalID ,&! local block id for all blocks blockGlobalID ! global block id for each local block !EOP !BOC !----------------------------------------------------------------------- ! ! depending on which optional arguments are present, extract the ! requested info ! !----------------------------------------------------------------------- if (present(nprocs)) nprocs = distribution%nprocs if (present(communicator)) communicator = distribution%communicator if (present(numLocalBlocks)) numLocalBlocks = distribution%numLocalBlocks if (present(blockLocation)) then if (associated(distribution%blockLocation)) then blockLocation => distribution%blockLocation else call abort_ice( & 'ice_distributionGet: blockLocation not allocated') return endif endif if (present(blockLocalID)) then if (associated(distribution%blockLocalID)) then blockLocalID = distribution%blockLocalID else call abort_ice( & 'ice_distributionGet: blockLocalID not allocated') return endif endif if (present(blockGlobalID)) then if (associated(distribution%blockGlobalID)) then blockGlobalID = distribution%blockGlobalID else call abort_ice( & 'ice_distributionGet: blockGlobalID not allocated') return endif endif !----------------------------------------------------------------------- !EOC end subroutine ice_distributionGet !*********************************************************************** !BOP ! !IROUTINE: ice_distributionGetBlockLoc ! !INTERFACE: subroutine ice_distributionGetBlockLoc(distribution, blockID, & 30,1 processor, localID) ! !DESCRIPTION: ! Given a distribution of blocks and a global block ID, return ! the processor and local index for the block. A zero for both ! is returned in the case that the block has been eliminated from ! the distribution (i.e. has no active points). ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (distrb), intent(in) :: & distribution ! input distribution for which information ! is requested integer (int_kind), intent(in) :: & blockID ! global block id for which location requested ! !OUTPUT PARAMETERS: integer (int_kind), intent(out) :: & processor, &! processor on which block resides localID ! local index for this block on this proc !EOP !BOC !----------------------------------------------------------------------- ! ! check for valid blockID ! !----------------------------------------------------------------------- if (blockID < 0 .or. blockID > nblocks_tot) then call abort_ice( & 'ice_distributionGetBlockLoc: invalid block id') return endif !----------------------------------------------------------------------- ! ! extract the location from the distribution data structure ! !----------------------------------------------------------------------- processor = distribution%blockLocation(blockID) localID = distribution%blockLocalID (blockID) !----------------------------------------------------------------------- !EOC end subroutine ice_distributionGetBlockLoc !*********************************************************************** !BOP ! !IROUTINE: ice_distributionGetBlockID ! !INTERFACE: subroutine ice_distributionGetBlockID(distribution, localID, & 13,1 blockID) ! !DESCRIPTION: ! Given a distribution of blocks and a local block index, return ! the global block id for the block. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: type (distrb), intent(in) :: & distribution ! input distribution for which information ! is requested integer (int_kind), intent(in) :: & localID ! local index for this block on this proc ! !OUTPUT PARAMETERS: integer (int_kind), intent(out) :: & blockID ! global block id for this local block !EOP !BOC !----------------------------------------------------------------------- ! ! check for valid localID ! !----------------------------------------------------------------------- if (localID < 0 .or. localID > distribution%numLocalBlocks) then call abort_ice( & 'ice_distributionGetBlockID: invalid local id') return endif !----------------------------------------------------------------------- ! ! extract the global ID from the distribution data structure ! !----------------------------------------------------------------------- blockID = distribution%blockGlobalID (localID) !----------------------------------------------------------------------- !EOC end subroutine ice_distributionGetBlockID !*********************************************************************** !BOP ! !IROUTINE: create_distrb_cart ! !INTERFACE: function create_distrb_cart(nprocs, workPerBlock) result(newDistrb) 6,4 ! !DESCRIPTION: ! This function creates a distribution of blocks across processors ! using a 2-d Cartesian distribution. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (int_kind), intent(in) :: & nprocs ! number of processors in this distribution integer (int_kind), dimension(:), intent(in) :: & workPerBlock ! amount of work per block ! !OUTPUT PARAMETERS: type (distrb) :: & newDistrb ! resulting structure describing Cartesian ! distribution of blocks !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (int_kind) :: & i, j, &! dummy loop indices istat, &! status flag for allocation iblock, jblock, &! is, ie, js, je, &! start, end block indices for each proc processor, &! processor position in cartesian decomp globalID, &! global block ID localID, &! block location on this processor nprocsX, &! num of procs in x for global domain nprocsY, &! num of procs in y for global domain numBlocksXPerProc, &! num of blocks per processor in x numBlocksYPerProc ! num of blocks per processor in y integer (int_kind), dimension(:), allocatable :: & proc_tmp ! temp processor id integer (int_kind) :: pid,n !---------------------------------------------------------------------- ! ! create communicator for this distribution ! !---------------------------------------------------------------------- call create_communicator(newDistrb%communicator, nprocs) !---------------------------------------------------------------------- ! ! try to find best processor arrangement ! !---------------------------------------------------------------------- newDistrb%nprocs = nprocs call proc_decomposition(nprocs, nprocsX, nprocsY) !---------------------------------------------------------------------- ! ! allocate space for decomposition ! !---------------------------------------------------------------------- allocate (newDistrb%blockLocation(nblocks_tot), & newDistrb%blockLocalID (nblocks_tot), stat=istat) allocate (newDistrb%blockCnt(nprocs)) !---------------------------------------------------------------------- ! ! distribute blocks linearly across processors in each direction ! !---------------------------------------------------------------------- numBlocksXPerProc = (nblocks_x-1)/nprocsX + 1 numBlocksYPerProc = (nblocks_y-1)/nprocsY + 1 do j=1,nprocsY do i=1,nprocsX processor = (j-1)*nprocsX + i ! number the processors ! left to right, bot to top is = (i-1)*numBlocksXPerProc + 1 ! starting block in i ie = i *numBlocksXPerProc ! ending block in i if (ie > nblocks_x) ie = nblocks_x js = (j-1)*numBlocksYPerProc + 1 ! starting block in j je = j *numBlocksYPerProc ! ending block in j if (je > nblocks_y) je = nblocks_y localID = 0 ! initialize counter for local index do jblock = js,je do iblock = is,ie globalID = (jblock - 1)*nblocks_x + iblock if (workPerBlock(globalID) /= 0) then localID = localID + 1 newDistrb%blockLocation(globalID) = processor newDistrb%blockLocalID (globalID) = localID else ! no work - eliminate block from distribution newDistrb%blockLocation(globalID) = 0 newDistrb%blockLocalID (globalID) = 0 endif end do end do ! if this is the local processor, set number of local blocks if (my_task == processor - 1) then newDistrb%numLocalBlocks = localID endif end do end do !---------------------------------------------------------------------- ! ! now store the local info ! !---------------------------------------------------------------------- if (newDistrb%numLocalBlocks > 0) then allocate (newDistrb%blockGlobalID(newDistrb%numLocalBlocks), & stat=istat) do j=1,nprocsY do i=1,nprocsX processor = (j-1)*nprocsX + i if (processor == my_task + 1) then is = (i-1)*numBlocksXPerProc + 1 ! starting block in i ie = i *numBlocksXPerProc ! ending block in i if (ie > nblocks_x) ie = nblocks_x js = (j-1)*numBlocksYPerProc + 1 ! starting block in j je = j *numBlocksYPerProc ! ending block in j if (je > nblocks_y) je = nblocks_y localID = 0 ! initialize counter for local index do jblock = js,je do iblock = is,ie globalID = (jblock - 1)*nblocks_x + iblock if (workPerBlock(globalID) /= 0) then localID = localID + 1 newDistrb%blockGlobalID (localID) = globalID endif end do end do endif end do end do endif allocate(proc_tmp(nprocs)) proc_tmp = 0 allocate(newDistrb%blockIndex(nprocs,max_blocks)) newDistrb%blockIndex(:,:) = 0 do n=1,nblocks_tot pid = newDistrb%blockLocation(n) if(pid>0) then proc_tmp(pid) = proc_tmp(pid) + 1 if(proc_tmp(pid) <= max_blocks) then newDistrb%blockIndex(pid,proc_tmp(pid)) = n endif endif enddo newDistrb%blockCnt(:) = proc_tmp(:) deallocate(proc_tmp) !---------------------------------------------------------------------- !EOC end function create_distrb_cart !********************************************************************** !BOP ! !IROUTINE: create_distrb_rake ! !INTERFACE: function create_distrb_rake(nprocs, workPerBlock) result(newDistrb) 1,9 ! !DESCRIPTION: ! This function distributes blocks across processors in a ! load-balanced manner based on the amount of work per block. ! A rake algorithm is used in which the blocks are first distributed ! in a Cartesian distribution and then a rake is applied in each ! Cartesian direction. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (int_kind), intent(in) :: & nprocs ! number of processors in this distribution integer (int_kind), dimension(:), intent(in) :: & workPerBlock ! amount of work per block ! !OUTPUT PARAMETERS: type (distrb) :: & newDistrb ! resulting structure describing ! load-balanced distribution of blocks !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (int_kind) :: & i,j,n ,&! dummy loop indices pid ,&! dummy for processor id istat ,&! status flag for allocates localBlock ,&! local block position on processor numOcnBlocks ,&! number of ocean blocks maxWork ,&! max amount of work in any block nprocsX ,&! num of procs in x for global domain nprocsY ! num of procs in y for global domain integer (int_kind), dimension(:), allocatable :: & priority ,&! priority for moving blocks workTmp ,&! work per row or column for rake algrthm procTmp ! temp processor id for rake algrthm type (distrb) :: dist ! temp hold distribution !---------------------------------------------------------------------- ! ! first set up as Cartesian distribution ! !---------------------------------------------------------------------- dist = create_distrb_cart(nprocs, workPerBlock) !---------------------------------------------------------------------- ! ! if the number of blocks is close to the number of processors, ! only do a 1-d rake on the entire distribution ! !---------------------------------------------------------------------- numOcnBlocks = count(workPerBlock /= 0) if (numOcnBlocks <= 2*nprocs) then allocate(priority(nblocks_tot), stat=istat) !*** initialize priority array do j=1,nblocks_y do i=1,nblocks_x n=(j-1)*nblocks_x + i if (workPerBlock(n) > 0) then priority(n) = maxWork + n - workPerBlock(n) else priority(n) = 0 endif end do end do allocate(workTmp(nblocks_tot), procTmp(nblocks_tot), stat=istat) workTmp(:) = 0 do i=1,nprocs procTmp(i) = i do n=1,nblocks_tot if (dist%blockLocation(n) == i) then workTmp(i) = workTmp(i) + workPerBlock(n) endif end do end do call ice_distributionRake (workTmp, procTmp, workPerBlock, & priority, dist) deallocate(workTmp, procTmp, stat=istat) !---------------------------------------------------------------------- ! ! otherwise re-distribute blocks using a rake in each direction ! !---------------------------------------------------------------------- else maxWork = maxval(workPerBlock) call proc_decomposition(dist%nprocs, nprocsX, nprocsY) !---------------------------------------------------------------------- ! ! load-balance using a rake algorithm in the x-direction first ! !---------------------------------------------------------------------- allocate(priority(nblocks_tot), stat=istat) !*** set highest priority such that eastern-most blocks !*** and blocks with the least amount of work are !*** moved first do j=1,nblocks_y do i=1,nblocks_x n=(j-1)*nblocks_x + i if (workPerBlock(n) > 0) then priority(n) = (maxWork + 1)*(nblocks_x + i) - & workPerBlock(n) else priority(n) = 0 endif end do end do allocate(workTmp(nprocsX), procTmp(nprocsX), stat=istat) do j=1,nprocsY workTmp(:) = 0 do i=1,nprocsX pid = (j-1)*nprocsX + i procTmp(i) = pid do n=1,nblocks_tot if (dist%blockLocation(n) == pid) then workTmp(i) = workTmp(i) + workPerBlock(n) endif end do end do call ice_distributionRake (workTmp, procTmp, workPerBlock, & priority, dist) end do deallocate(workTmp, procTmp, stat=istat) !---------------------------------------------------------------------- ! ! use a rake algorithm in the y-direction now ! !---------------------------------------------------------------------- !*** set highest priority for northern-most blocks do j=1,nblocks_y do i=1,nblocks_x n=(j-1)*nblocks_x + i if (workPerBlock(n) > 0) then priority(n) = (maxWork + 1)*(nblocks_y + j) - & workPerBlock(n) else priority(n) = 0 endif end do end do allocate(workTmp(nprocsY), procTmp(nprocsY), stat=istat) do i=1,nprocsX workTmp(:) = 0 do j=1,nprocsY pid = (j-1)*nprocsX + i procTmp(j) = pid do n=1,nblocks_tot if (dist%blockLocation(n) == pid) then workTmp(j) = workTmp(j) + workPerBlock(n) endif end do end do call ice_distributionRake (workTmp, procTmp, workPerBlock, & priority, dist) end do deallocate(workTmp, procTmp, priority, stat=istat) endif ! 1d or 2d rake !---------------------------------------------------------------------- ! ! create new distribution with info extracted from the temporary ! distribution ! !---------------------------------------------------------------------- newDistrb%nprocs = nprocs newDistrb%communicator = dist%communicator allocate(newDistrb%blockLocation(nblocks_tot), & newDistrb%blockLocalID(nblocks_tot), stat=istat) allocate (newDistrb%blockCnt(nprocs)) allocate(procTmp(nprocs), stat=istat) allocate(newDistrb%blockIndex(nprocs,max_blocks)) newDistrb%blockIndex(:,:) = 0 procTmp = 0 do n=1,nblocks_tot pid = dist%blockLocation(n) ! processor id newDistrb%blockLocation(n) = pid if (pid > 0) then procTmp(pid) = procTmp(pid) + 1 newDistrb%blockLocalID (n) = procTmp(pid) if(procTmp(pid) <= max_blocks) then newDistrb%blockIndex(pid,procTmp(pid)) = n endif else newDistrb%blockLocalID (n) = 0 endif end do newDistrb%numLocalBlocks = procTmp(my_task+1) if (minval(procTmp) < 1) then call abort_ice( & 'create_distrb_rake: processors left with no blocks') return endif newDistrb%blockCnt(:) = procTmp(:) deallocate(procTmp, stat=istat) if (istat > 0) then call abort_ice( & 'create_distrb_rake: error allocating last procTmp') return endif allocate(newDistrb%blockGlobalID(newDistrb%numLocalBlocks), & stat=istat) if (istat > 0) then call abort_ice( & 'create_distrb_rake: error allocating blockGlobalID') return endif localBlock = 0 do n=1,nblocks_tot if (newDistrb%blockLocation(n) == my_task+1) then localBlock = localBlock + 1 newDistrb%blockGlobalID(localBlock) = n endif end do !---------------------------------------------------------------------- call ice_distributionDestroy(dist) !---------------------------------------------------------------------- !EOC end function create_distrb_rake !********************************************************************** !BOP ! !IROUTINE: create_distrb_spacecurve ! !INTERFACE: function create_distrb_spacecurve(nprocs, maxBlock, work_per_block,prob_per_block,blockType, bStats, FixMaxBlock ) 2,35 ! !Description: ! This function distributes blocks across processors in a ! load-balanced manner using space-filling curves ! ! !REVISION HISTORY: ! added by J. Dennis 3/10/06 ! !INPUT PARAMETERS: integer (int_kind), intent(in) :: & nprocs ! number of processors in this distribution integer (int_kind), intent(in) :: maxBlock integer (int_kind), dimension(:), intent(in) :: & work_per_block ! amount of work per block real(dbl_kind), dimension(:), intent(in) :: & prob_per_block ! probability sea-ice within block integer (int_kind), dimension(:), intent(in) :: & blockType ! type of block real (dbl_kind), dimension(:,:), intent(in) :: bStats logical, intent(in) :: FixMaxBlock ! !OUTPUT PARAMETERS: type (distrb) :: & create_distrb_spacecurve ! resulting structure describing ! load-balanced distribution of blocks !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (int_kind) :: & i,j,k,n ,&! dummy loop indices pid ,&! dummy for processor id localID ,&! local block position on processor max_work ,&! max amount of work in any block nprocs_x ,&! num of procs in x for global domain nprocs_y ! num of procs in y for global domain character(char_len) :: fname integer (int_kind) :: maxB integer (int_kind), dimension(:),allocatable :: & idxT_i,idxT_j,Lindx,Lindx2 ! Temporary indices for SFC integer (int_kind), dimension(:,:),allocatable :: & Mesh ,&! !arrays to hold Space-filling curve Mesh2 ! integer (int_kind) :: & nblocksL,nblocks, &! Number of blocks local and total ii,extra,tmp1, &! loop tempories used for s1,ig ! partitioning curve logical, parameter :: Debug = .FALSE. integer (int_kind), dimension(:), allocatable :: & priority ,&! priority for moving blocks work_tmp ,&! work per row or column for rake algrthm proc_tmp ,&! temp processor id for rake algrthm block_count ! counter to determine local block indx integer (int_kind), allocatable, dimension(:) :: & blockLocation, &! block to processor mapping distance, &! location in uncompressed SFC type_on_curve, &! type of blocks work_on_curve type (distrb) :: dist ! temp hold distribution integer (int_kind) :: numIce, minblocks type (factor_t) :: xdim, ydim integer (int_kind) :: it,jj,i2,j2 integer (int_kind) :: curveSize, sb_x, sb_y, itmp,numfac integer (int_kind) :: subNum,sfcNum logical (log_kind) :: foundX integer (int_kind) :: ns,ntmp integer (int_kind) :: numLocalBlocks real (dbl_kind), allocatable, dimension(:) :: & Cost_per_proc , & Cost_per_block real (dbl_kind), allocatable, dimension(:,:) :: cStats ! block statistics on SFC curve integer (int_kind), allocatable, dimension(:) :: work_per_proc,work_per_block2 integer (int_kind) :: ierr ! error return code character(len=char_len) :: partitioning_type logical, parameter :: verbose = .FALSE. !------------------------------------------------------ ! Space filling curves only work if: ! ! nblocks_x = nblocks_y ! nblocks_x = 2^m 3^n 5^p where m,n,p are integers !------------------------------------------------------ if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #1: nblocks_x,nblocks_y: ',nblocks_x,nblocks_y if((.not. IsFactorable(nblocks_y)) .or. (.not. IsFactorable(nblocks_x))) then if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #1.1' create_distrb_spacecurve = create_distrb_cart(nprocs, work_per_block) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #1.2' return endif if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #1.3' !----------------------------------------------- ! Factor the numbers of blocks in each dimension !----------------------------------------------- xdim = Factor(nblocks_x) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #1.4' ydim = Factor(nblocks_y) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #1.5' numfac = xdim%numfact if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #2' !--------------------------------------------- ! Match the common factors to create SFC curve !--------------------------------------------- curveSize=1 do it=1,numfac call MatchFactor(xdim,ydim,itmp,foundX) curveSize = itmp*curveSize enddo if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #3' !-------------------------------------- ! determine the size of the sub-blocks ! within the space-filling curve !-------------------------------------- sb_x = ProdFactor(xdim) sb_y = ProdFactor(ydim) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #4' call create_communicator(dist%communicator, nprocs) dist%nprocs = nprocs !---------------------------------------------------------------------- ! ! allocate space for decomposition ! !---------------------------------------------------------------------- if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #5' allocate(blockLocation(nblocks_tot),type_on_curve(nblocks_tot)) allocate(distance(nblocks_tot)) allocate(work_on_curve(nblocks_tot)) allocate (dist%blockLocation(nblocks_tot), & dist%blockLocalID (nblocks_tot)) allocate (dist%blockCnt(nprocs)) blockLocation = 0 dist%blockLocation=0 dist%blockLocalID =0 if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #6' !---------------------------------------------------------------------- ! Create the array to hold the SFC and indices into it !---------------------------------------------------------------------- allocate(Mesh(curveSize,curveSize)) allocate(Mesh2(nblocks_x,nblocks_y)) allocate(idxT_i(nblocks_tot),idxT_j(nblocks_tot),Lindx(nblocks_tot)) allocate(Lindx2(nblocks_tot)) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #7' Mesh = 0 Mesh2 = 0 !---------------------------------------------------------------------- ! Cost function estimation... this is a potential replace for work_per_block !---------------------------------------------------------------------- allocate(Cost_per_proc(nblocks_tot)) allocate(Cost_per_block(nblocks_tot)) allocate(work_per_proc(nprocs)) allocate(work_per_block2(nblocks_tot)) call EstimateCost(bStats,nblocks_tot,Cost_per_block) blockLocation=0 do i=1,nblocks_tot work_per_block2(i) = NINT(10.0*ABS(Cost_per_block(i)),kind=int_kind) enddo if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #8' !---------------------------------------------------------------------- ! Generate the space-filling curve !---------------------------------------------------------------------- call GenSpaceCurve(Mesh) Mesh = Mesh + 1 ! make it 1-based indexing if(Debug) then if(my_task ==0) call PrintCurve(Mesh) endif !----------------------------------------------- ! Reindex the SFC to address internal sub-blocks !----------------------------------------------- do j=1,curveSize do i=1,curveSize sfcNum = (Mesh(i,j) - 1)*(sb_x*sb_y) + 1 do jj=1,sb_y do ii=1,sb_x subNum = (jj-1)*sb_x + (ii-1) i2 = (i-1)*sb_x + ii j2 = (j-1)*sb_y + jj Mesh2(i2,j2) = sfcNum + subNum enddo enddo enddo enddo if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #9' !------------------------------------------------ ! create a linear array of i,j coordinates of SFC !------------------------------------------------ idxT_i=0;idxT_j=0;Lindx=0;Lindx2=0 do j=1,nblocks_y do i=1,nblocks_x n = (j-1)*nblocks_x + i ig = Mesh2(i,j) if(work_per_block(n) /= 0) then idxT_i(ig)=i;idxT_j(ig)=j endif Lindx(n) = ig enddo enddo do i=1,nblocks_tot type_on_curve(Lindx(i)) = blockType(i) enddo if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #10' ! ------------------------------ ! compress out the land blocks ! ------------------------------ ii=0 do i=1,nblocks_tot if(IdxT_i(i) .gt. 0) then ii=ii+1 ! Mesh3(idxT_i(i),idxT_j(i)) = ii n = (idxT_j(i)-1)*nblocks_x + idxT_i(i) Lindx2(n) = ii endif enddo nblocks=ii !DBG if(my_task == 0) then !DBG print *,'work_per_block2:',work_per_block2 !DBG endif if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #11' allocate(cStats(numCoeff,nblocks)) do i=1,nblocks_tot if(Lindx2(i)>0) then work_on_curve(Lindx2(i)) = work_per_block2(i) type_on_curve(Lindx2(i)) = blockType(i) cStats(:,Lindx2(i)) = bStats(:,i) distance(Lindx2(i)) = Lindx(i) endif enddo if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #12' if(lprint_stats) then fname = 'WorkPerBlock.bin' call WriteIntegerArray(fname,nblocks,work_on_curve) !DBG print *,'work_per_block2:',work_per_block2 !DBG print *,'work_on_curve:', work_on_curve(1:nblocks) ! open(nu_timing,file='WorkPerBlock.bin',recl=4*nblocks, & ! form = 'unformatted', access='direct',status='unknown') ! write(nu_timing,rec=1) work_on_curve(1:nblocks) ! close(nu_timing) endif if(lprint_stats) then call WriteProbabilityStats(bStats,nblocks_tot) endif if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #13' maxB=MIN(max_blocks,maxBlock) partitioning_type = 'weight' select case(partitioning_type) case ('type') ! KLUDGE this is just for testing need to come up with a general solution numIce = COUNT(blockType .eq. iceType) minblocks = CEILING(REAL(numIce)/REAL(nprocs),kind=int_kind) ! print *,'before TypePartition: {minblocks,maxblocks}: ',minblocks,maxB call TypePartition(type_on_curve,minblocks,maxB,blockLocation) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #14' if(MAXVAL(blockLocation) > nprocs) then print *,'ERROR: problem with partitioning: insufficient processors' endif ! re-index blockLocation from curve to physical ordering do i=1,nblocks_tot dist%blockLocation(i) = blockLocation(Lindx(i)) enddo if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #15' case ('weight') if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #16' call PartitionCurve(work_on_curve(1:nblocks),work_per_proc, & blockLocation(1:nblocks),distance(1:nblocks), nprocs,maxB,cStats,FixMaxBlock, ierr) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #17' !DBG print *,'After PartitionCurve:' if(ierr < 0) then call abort_ice('create_distrb_spacecurve: PartitionCurve failed') endif !DBG print *,'before broadcast_array:' call broadcast_array(blockLocation,master_task) !DBG print *,'After broadcast_array' ! re-index blockLocation from curve to physical ordering numLocalBlocks=0 do i=1,nblocks_tot if(Lindx2(i)>0) then dist%blockLocation(i) = blockLocation(Lindx2(i)) numLocalBlocks=numLocalBlocks+1 endif enddo end select ! call qsort(dist%blockLocation(1:numLocalBlocks)) ! print *,'IAM: ',my_task,'create_distrb_spacecurve: dist%blockLocation(:)', dist%blockLocation(1:nblocks_tot) ! print *,'IAM: ',my_task,'create_distrb_spacecurve: dist%blockLocation(1:numLocalBlocks)', dist%blockLocation(1:numLocalBlocks) ! call ConvertStatsBlock2Proc(dist%blockLocation,bStats,cStats) !DBG print *,'Before call to BuildProbabilityStats2' call BuildProbabilityStats2(dist%blockLocation,cStats) !DBG print *,'After call to BuildProbabilityStats2' if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #19' call EstimateCost(cStats,nprocs,Cost_per_proc) !DBG print *,'before WriteProbabilityStats' !DBG print *,'after WriteProbabilityStats' if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #20' if(lprint_stats) then fname = 'Q.bin' call WriteIntegerArray(fname,nblocks_tot,dist%blockLocation) fname = 'Cost.bin' call WriteDblArray(fname,nprocs,Cost_per_proc) fname = 'perfm.bin' call WriteDblArray(fname,numCoeff,perfmodel) endif if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #21' !---------------------------------------------------------------------- ! Reset the dist data structure !---------------------------------------------------------------------- allocate(proc_tmp(nprocs)) proc_tmp = 0 allocate(dist%blockIndex(nprocs,max_blocks)) dist%blockIndex(:,:) = 0 do n=1,nblocks_tot pid = dist%blockLocation(n) if(pid>0) then proc_tmp(pid) = proc_tmp(pid) + 1 dist%blockLocalID(n) = proc_tmp(pid) if(proc_tmp(pid) <= max_blocks) then dist%blockIndex(pid,proc_tmp(pid)) = n endif endif enddo dist%blockCnt(:) = proc_tmp(:) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #22' !--------------------------------------- ! Set the number of active local blocks !--------------------------------------- dist%numLocalBlocks = proc_tmp(my_task+1) if (dist%numLocalBlocks>0) then allocate(dist%blockGlobalID(dist%numLocalBlocks)) localID=1 do n=1,nblocks_tot pid = dist%blockLocation(n) if(pid == my_task+1) then dist%blockGlobalID(localID) = n localID=localID+1 endif enddo endif if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #23' if(Debug) then if(my_task==0) print *,'dist%blockLocation:= ',dist%blockLocation print *,'IAM: ',my_task,' SpaceCurve: Number of blocks {total,local} :=', & nblocks_tot,nblocks,proc_tmp(my_task+1) endif !DBG print *,'create_distrb_spacecurve: before deallocate block' !--------------------------------- ! Deallocate temporary arrays !--------------------------------- !DBG print *,'create_distrb_spacecurve: before deallocate(blockLocation)' deallocate(blockLocation) !DBG print *,'create_distrb_spacecurve: before deallocate(type_on_curve)' deallocate(type_on_curve) !DBG print *,'create_distrb_spacecurve: before deallocate(work_on_curve)' deallocate(work_on_curve) !DBG print *,'create_distrb_spacecurve: before deallocate(proc_tmp)' deallocate(proc_tmp) !DBG print *,'create_distrb_spacecurve: before deallocate(Mesh,Mesh2)' if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #24' deallocate(Mesh,Mesh2) !DBG print *,'create_distrb_spacecurve: before deallocate(idxT_i,idxT_j,Lindx,Lindx2)' deallocate(idxT_i,idxT_j,Lindx,Lindx2) !DBG print *,'create_distrb_spacecurve: before deallocate(Cost_per_proc)' deallocate(Cost_per_proc) !DBG print *,'create_distrb_spacecurve: before deallocate(Cost_per_block)' deallocate(Cost_per_block) !DBG print *,'create_distrb_spacecurve: before deallocate(work_per_proc)' deallocate(work_per_proc) !DBG print *,'create_distrb_spacecurve: before deallocate(work_per_block2)' deallocate(work_per_block2) if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #25' if(verbose .and. my_task == 0) then print *,'create_distrb_spacecurve: blockCnt ',dist%blockCnt print *,'create_distrb_spacecurve: blockIndex ',dist%blockIndex endif !DBG print *,'create_distrb_spacecurve: before assignment of result' !---------------------------------------------------------------------- create_distrb_spacecurve = dist ! return the result !---------------------------------------------------------------------- if (verbose .and. my_task==0) then print *,'create_distrb_spacecurve%blockGlobalID: ',create_distrb_spacecurve%blockGlobalID print *,'create_distrb_spacecurve%blockCnt: ',create_distrb_spacecurve%blockCnt endif !DBG print *,'At the end of create_distrb_spacecurve' if(verbose) print *,'IAM: ',my_task,'create_distrb_spacecurve: point #26' !EOC end function create_distrb_spacecurve subroutine TypePartition(blockType,minblocks,maxblocks,blockLocation) 1 integer(kind=int_kind), intent(in) :: blockType(nblocks_tot) integer, intent(in) :: minblocks, & ! Minimum number of blocks per processor maxblocks ! Maximum number of blocks per processor integer(kind=int_kind), intent(inout) :: blockLocation(nblocks_tot) integer :: i,ip,cur,next integer :: cntLnd,tcntLnd, & cntIcefree,tcntIcefree, & cntIce,tcntIce tcntIce = 0 tcntLnd = 0 tcntIcefree = 0 cntIce = 0 cntLnd = 0 cntIcefree = 0 ip = 1 do i=1,nblocks_tot cur = blockType(i) if(i<nblocks_tot) then next = blockType(i+1) else next = cur endif !------------ ! Land point !------------ if(cur == lndType) then tcntLnd = tcntLnd + 1 endif !---------------------- ! ice Free point !---------------------- if (cur == icefreeType) then tcntIcefree = tcntIcefree + 1 cntIcefree = cntIcefree + 1 blockLocation(i) = ip if ((cntIcefree == maxblocks) .or. (next /= cur)) then ip = ip + 1 cntIcefree=0 endif endif !---------------------- ! ice Free point !---------------------- if (cur == iceType) then tcntIce = tcntIce + 1 cntIce = cntIce + 1 blockLocation(i) = ip if ((cntIce == minblocks) .or. (next /= cur)) then ip = ip + 1 cntIce=0 endif endif enddo if(my_task == 0) then write(*,23) tcntLnd+tcntIce+tcntIcefree, tcntIce, tcntIcefree, tcntLnd ! print *,'TypePartition: land blks: ',tcntLnd,' Ice blks: ', & ! tcntIce,' IceFree blks: ',tcntIcefree write(*,24) MAXVAL(blockLocation) ! print *,'TypePartition: Partitioned across ',MAXVAL(blockLocation),' processors' endif 23 format('Total blocks: ',i5,' Ice blocks: ',i5,' IceFree blocks: ',i5,' Land blocks: ',i5) 24 format('Partitioned across ',i5,' processors') end subroutine TypePartition subroutine PartitionCurve(work_per_block, work_per_proc, blockLocation, distance, & 1,5 nproc, max_blocks, Stats, FixMaxBlock, ierr) integer (int_kind), intent(inout) :: work_per_block(:) integer (int_kind), intent(inout) :: work_per_proc(:) integer (int_kind), intent(inout) :: blockLocation(:) integer (int_kind), intent(inout) :: distance(:) integer (int_kind), intent(in) :: nproc integer (int_kind), intent(in) :: max_blocks real (dbl_kind), intent(in) :: Stats(:,:) logical, intent(in) :: FixMaxBlock integer (int_kind), intent(inout) :: ierr integer :: cnt integer :: nb,anProc integer :: n,ip,i,imax real :: totalCost, avgCost, maxCost,dtCost integer, allocatable :: saveblockLocation(:) integer :: maxBlocks,minBlocks integer :: ivalue real (real_kind) :: maxValue real :: maxValue_save real :: minValue logical :: contLoop integer :: maxB,minB logical, parameter :: verbose = .FALSE. integer :: it,maxiter integer :: save_maxB real :: save_maxCost integer :: amaxBlocks real :: aWork,minWork,maxWork,maxWorkBlock real :: minCostBlock, maxCostBlock real :: maxCost_old real (real_kind) :: maxDil,amaxDil real (dbl_kind), allocatable, dimension(:) :: cost_per_proc real (dbl_kind), allocatable, dimension(:) :: cost_per_block real (dbl_kind), allocatable, dimension(:,:) :: pStats allocate(pStats(numCoeff,nblocks_tot)) maxB=max_blocks maxDil = 5.0 if ( my_task .eq. master_task) then nb = SIZE(blockLocation) ! number of blocks allocate(saveblockLocation(nb)) allocate(cost_per_proc(nb)) allocate(cost_per_block(nb)) ! -------------------------------------------- ! Estimate the computational cost of each block ! -------------------------------------------- call EstimateCost(Stats,nb,cost_per_block) totalCost = SUM(cost_per_block) save_maxCost = totalCost maxCostBlock = MAXVAL(cost_per_block) minCostBlock = MINVAL(cost_per_block) save_maxB = maxB avgCost = (totalCost/nproc) print *,'PartitionCurve: nblocks,nproc ',nb,nproc write(nu_diag,213) totalCost, avgCost, minCostBlock, maxCostBlock !DBG print *,'PartitionCurve: totalCost,avgCost, maxCostBlock: ',totalCost,avgCost,maxCostBlock !DBG write(nu_diag,*) distance minB = CEILING(real(nb)/real(nproc),kind=int_kind) if(maxB < minB ) then print *,'ERROR: unable to partition ',nb,' blocks across ',nproc,' processors' print *,'ERROR: Either increase max_blocks := ',maxB print *,'ERROR: Either increase number of processors' ierr = -2 return endif if(FixMaxBlock) then minB = maxB endif do while (maxB >= minB ) dtCost = 1.0 maxValue = maxCostBlock*real(maxB) minValue = maxCostBlock maxCost_old = maxValue contLoop = .true. maxiter = 20 it = 1 do while(contLoop ) cost_per_proc=0.0 call wPartition(cost_per_block,blockLocation, distance, nproc,maxB,maxValue,maxDil,amaxBlocks, amaxDil) anProc = MAXVAL(blockLocation) call ConvertStatsBlock2Proc(blockLocation,Stats,pStats) call EstimateCost(pStats,anProc,cost_per_proc) maxCost = MAXVAL(cost_per_proc) if(lprint_stats) then write(nu_diag,211) it,anProc,amaxBlocks,maxB,minValue, maxValue,maxCost, amaxDil endif if(maxCost > maxValue) then minValue = maxValue dtCost = (maxCost_old-minValue)/2.0 maxValue = maxCost_old - dtCost else dtCost = (maxCost-minValue)/2.0 maxValue = maxCost - dtCost maxCost_old = maxCost endif if(maxCost == maxCostBlock) contLoop = .false. if(dtCost < 1.0e-5) contLoop = .false. if( anProc == nproc .and. it >= maxiter) contLoop = .false. if ((save_maxCost > maxCost .and. amaxBlocks <= maxB) .or. & (save_maxCost == maxCost .and. save_maxB > maxB) ) then save_maxCost = maxCost save_maxB = maxB saveblockLocation = blockLocation endif it=it+1 enddo maxB = maxB - 1 enddo blockLocation = saveblockLocation write(nu_diag,214) perfmodel_name print *,'-------------------------wSFC-----------------------' call PrintPartitionLB(blockLocation,nproc,Stats) 211 format('Partition loop: it: ',i4,' anProc: ',i4,' amaxBlocks ', & i4,' maxB: ',i4,' [min,max]Value: ',2(f14.4),' maxCost: ',f14.4,' maxDilation: ',f14.4 ) 213 format('PartitionCurve: TotalCost: ',f12.4,' avgCost: ',f12.4, & ' minBlockCost: ',f12.4,' maxBlockCost: ',f12.4 ) 214 format('Using performance model: ',a20) deallocate(cost_per_proc) deallocate(cost_per_block) deallocate(saveblockLocation) endif deallocate(pStats) ierr = 0 end subroutine PartitionCurve subroutine wPartition(cost_per_block, blockLocation, distance, nproc, max_blocks, maxValue, maxDil, amaxBlocks,amaxDil) 1 real (dbl_kind), intent(in) :: cost_per_block(:) integer (int_kind), intent(inout) :: blockLocation(:) integer (int_kind), intent(inout) :: distance(:) integer (int_kind), intent(in) :: nproc integer (int_kind), intent(in) :: max_blocks real (real_kind), intent(in) :: maxvalue real (real_kind), intent(in) :: maxDil ! maximum alloable dilation of domains integer (int_kind), intent(inout) :: amaxBlocks real (real_kind), intent(out) :: amaxDil integer (int_kind) :: n,ip,i,numB real :: totalCost,avgCost,sumTMP,sumTMP2 ! integer (int_kind), allocatable :: work_per_proc(:) integer :: minDist,Dist integer :: sloc real :: dilation, dilation2 logical, parameter :: Info = .FALSE. logical :: break_loop n = SIZE(blockLocation) ! number of blocks totalCost = SUM(cost_per_block) amaxBlocks = 0 amaxDil = 1.0 avgCost = (totalCost/nproc) !DBG print *,'cost_per_block: ',cost_per_block ip = 1 i=1 ! cost_per_block=0 do while (i<=n) sumTMP = 0 numB = 0 break_loop = .FALSE. sloc = distance(i) do while ( ((sumTMP<maxValue) .or. (ip == nproc)) & .and. (i<=n) & .and. (.not. break_loop)) sumTMP2 = sumTMP + cost_per_block(i) minDist = numB + 1 Dist = distance(i) - sloc + 1 dilation2 = real(Dist)/real(minDist) if(((sumTMP2 <= maxValue) .and. (numB < max_blocks) .and. dilation2 <= maxDil) & ! if(((sumTMP2 <= maxValue) .and. (numB < max_blocks)) & .or. (ip == nproc) ) then blockLocation(i) = ip i=i+1 sumTMP = sumTMP2 dilation = dilation2 numB = numB+1 else break_loop = .TRUE. endif enddo if(amaxBlocks < numB) amaxBlocks = numB if(amaxDil < dilation) amaxDil = dilation ip = ip+1 enddo ! call abort_ice('wPartition: at the end of the subroutine') end subroutine wPartition !********************************************************************** !BOP ! !IROUTINE: ice_distributionRake ! !INTERFACE: subroutine ice_distributionRake (procWork, procID, blockWork, & 3 priority, distribution) ! !DESCRIPTION: ! This subroutine performs a rake algorithm to distribute the work ! along a vector of processors. In the rake algorithm, a work ! threshold is first set. Then, moving from left to right, work ! above that threshold is raked to the next processor in line. ! The process continues until the end of the vector is reached ! and then the threshold is reduced by one for a second rake pass. ! In this implementation, a priority for moving blocks is defined ! such that the rake algorithm chooses the highest priority ! block to be moved to the next processor. This can be used ! for example to always choose the eastern-most block or to ! ensure a block does not stray too far from its neighbors. ! ! !REVISION HISTORY: ! same as module ! !INPUT PARAMETERS: integer (int_kind), intent(in), dimension(:) :: & blockWork ,&! amount of work per block procID ! global processor number ! !INPUT/OUTPUT PARAMETERS: integer (int_kind), intent(inout), dimension(:) :: & procWork ,&! amount of work per processor priority ! priority for moving a given block type (distrb), intent(inout) :: & distribution ! distribution to change ! !OUTPUT PARAMETERS: !EOP !BOC !---------------------------------------------------------------------- ! ! local variables ! !---------------------------------------------------------------------- integer (int_kind) :: & i, n, &! dummy loop indices np1, &! n+1 corrected for cyclical wrap iproc, inext, &! processor ids for current and next nprocs, numBlocks, &! number of blocks, processors lastPriority, &! priority for most recent block minPriority, &! minimum priority lastLoc, &! location for most recent block meanWork, maxWork, &! mean,max work per processor diffWork, residual, &! work differences and residual work numTransfers ! counter for number of block transfers !---------------------------------------------------------------------- ! ! initialization ! !---------------------------------------------------------------------- nprocs = size(procWork) numBlocks = size(blockWork) !*** compute mean,max work per processor meanWork = sum(procWork)/nprocs + 1 maxWork = maxval(procWork) residual = mod(meanWork,nprocs) minPriority = 1000000 do n=1,nprocs iproc = procID(n) do i=1,numBlocks if (distribution%blockLocation(i) == iproc) then minPriority = min(minPriority,priority(i)) endif end do end do !---------------------------------------------------------------------- ! ! do two sets of transfers ! !---------------------------------------------------------------------- transferLoop: do !---------------------------------------------------------------------- ! ! do rake across the processors ! !---------------------------------------------------------------------- numTransfers = 0 do n=1,nprocs if (n < nprocs) then np1 = n+1 else np1 = 1 endif iproc = procID(n) inext = procID(np1) if (procWork(n) > meanWork) then !*** pass work to next diffWork = procWork(n) - meanWork rake1: do while (diffWork > 1) !*** attempt to find a block with the required !*** amount of work and with the highest priority !*** for moving (eg boundary blocks first) lastPriority = 0 lastLoc = 0 do i=1,numBlocks if (distribution%blockLocation(i) == iproc) then if (priority(i) > lastPriority ) then lastPriority = priority(i) lastLoc = i endif endif end do if (lastLoc == 0) exit rake1 ! could not shift work numTransfers = numTransfers + 1 distribution%blockLocation(lastLoc) = inext if (np1 == 1) priority(lastLoc) = minPriority diffWork = diffWork - blockWork(lastLoc) procWork(n ) = procWork(n )-blockWork(lastLoc) procWork(np1) = procWork(np1)+blockWork(lastLoc) end do rake1 endif end do !---------------------------------------------------------------------- ! ! increment meanWork by one and repeat ! !---------------------------------------------------------------------- meanWork = meanWork + 1 if (numTransfers == 0 .or. meanWork > maxWork) exit transferLoop end do transferLoop !---------------------------------------------------------------------- !EOC end subroutine ice_distributionRake !*********************************************************************** subroutine PrintPartitionLB(Location,n,bStats) 1,2 integer :: Location(:) integer :: n real (dbl_kind), dimension(:,:) :: bStats real :: maxCost,minCost integer :: anProc integer :: maxB real :: aCost real (dbl_kind), allocatable, dimension(:) :: cost_per_proc(:) integer :: i,ncnt real (dbl_kind), allocatable, dimension(:,:) :: pStats allocate(cost_per_proc(n)) allocate(pStats(numCoeff,n)) call ConvertStatsBlock2Proc(Location,bStats,pStats) call EstimateCost(pStats,n,cost_per_proc) !DBG print *,'PrintPartitinoLB: Location:',Location !DBG print *,'PrintPartitinoLB: cost_per_proc:',cost_per_proc maxCost = MAXVAL(cost_per_proc) minCost = MINVAL(cost_per_proc) aCost = SUM(cost_per_proc)/real(n) anProc=MAXVAL(Location) maxB=0; do i=1,n ncnt = COUNT(Location==i) maxB = MAX(ncnt,maxB) enddo !DBG print *,'maxCost: ',maxCost !DBG print *,'minCost: ',minCost !DBG print *,'aCost: ',aCost ! print *,'PrintPartitionLB: on ',anProc,' processors Avg,Min,Max work/proc, imbalance ', & ! aWork,minWork,maxWork,ABS(aWork-maxWork)/aWork write(nu_diag,212) anProc, maxB,aCost,minCost,maxCost,ABS(aCost-maxCost)/aCost deallocate(cost_per_proc) deallocate(pStats) 212 format('PrintPartitionLB: on ',i4,' procs maxB: ',i4,' Avg,Min,Max Cost/proc: ',(3(f10.4)),' imbalance: ',f8.2) end subroutine PrintPartitionLB subroutine EstimateCost(coeffMatrix,n,Cost) 5 real (dbl_kind) :: coeffMatrix(:,:) integer (int_kind) :: n real (dbl_kind):: Cost(:) real (dbl_kind) :: tmp integer (int_kind) :: i,j Cost=0.0_dbl_kind do i=1,n tmp = 0.0_dbl_kind do j=1,numCoeff tmp = tmp + coeffMatrix(j,i) *perfmodel(j) enddo Cost(i) = tmp enddo end subroutine EstimateCost subroutine ConvertStatsBlock2Proc(Location,bStats,pStats) 2 integer (int_kind) :: Location(:) real (dbl_kind), intent(in) :: bStats(:,:) real (dbl_kind), intent(out) :: pStats(:,:) integer (int_kind) :: i,ip,n n = size(Location) pStats = 0.0d0 do i=1,n ip = Location(i) if(ip > 0) then pStats(:,ip) = pStats(:,ip) + bStats(:,i) endif enddo end subroutine ConvertStatsBlock2Proc subroutine WriteProbabilityStats(coeffMatrix,n) 1 real(dbl_kind) :: coeffMatrix(:,:) integer (int_kind) :: n if(my_task == master_task) then open(nu_timing,file='probStats.bin',recl=8*numCoeff*n, & form = 'unformatted', access = 'direct', status = 'unknown') write(nu_timing,rec=1) coeffMatrix(:,1:n) close(nu_timing) endif end subroutine WriteProbabilityStats subroutine WriteIntegerArray(fname,n,array) 2 character(char_len) :: fname integer (int_kind) :: n integer (int_kind) :: array(:) if(my_task == master_task) then open(nu_timing,file=TRIM(fname),recl=4*n, & form = 'unformatted', access = 'direct', status = 'unknown') write(nu_timing,rec=1) array(1:n) close(nu_timing) endif end subroutine WriteIntegerArray subroutine WriteDblArray(fname,n,array) 2 character(char_len) :: fname real (dbl_kind) :: array(:) integer (int_kind) :: n if(my_task == master_task) then open(nu_timing,file=TRIM(fname),recl=8*n, & form = 'unformatted', access = 'direct', status = 'unknown') write(nu_timing,rec=1) array(1:n) close(nu_timing) endif end subroutine WriteDblArray end module ice_distribution !|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||