LCOV - code coverage report
Current view: top level - src - qs_fb_distribution_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b4bd748) Lines: 209 272 76.8 %
Date: 2025-03-09 07:56:22 Functions: 10 18 55.6 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : MODULE qs_fb_distribution_methods
       9             : 
      10             :    USE cell_types,                      ONLY: cell_type
      11             :    USE cp_dbcsr_api,                    ONLY: dbcsr_distribution_get,&
      12             :                                               dbcsr_distribution_type,&
      13             :                                               dbcsr_get_info,&
      14             :                                               dbcsr_p_type,&
      15             :                                               dbcsr_type
      16             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      17             :                                               cp_logger_type
      18             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      19             :                                               cp_print_key_unit_nr
      20             :    USE input_section_types,             ONLY: section_vals_type
      21             :    USE kinds,                           ONLY: dp
      22             :    USE message_passing,                 ONLY: mp_para_env_type
      23             :    USE particle_types,                  ONLY: particle_type
      24             :    USE qs_environment_types,            ONLY: get_qs_env,&
      25             :                                               qs_environment_type
      26             :    USE qs_fb_atomic_halo_types,         ONLY: &
      27             :         fb_atomic_halo_build_halo_atoms, fb_atomic_halo_cost, fb_atomic_halo_create, &
      28             :         fb_atomic_halo_init, fb_atomic_halo_nullify, fb_atomic_halo_obj, fb_atomic_halo_release, &
      29             :         fb_atomic_halo_set, fb_build_pair_radii
      30             :    USE qs_fb_env_types,                 ONLY: fb_env_get,&
      31             :                                               fb_env_obj,&
      32             :                                               fb_env_set
      33             :    USE qs_kind_types,                   ONLY: qs_kind_type
      34             :    USE util,                            ONLY: sort
      35             : #include "./base/base_uses.f90"
      36             : 
      37             :    IMPLICIT NONE
      38             : 
      39             :    PRIVATE
      40             : 
      41             :    PUBLIC :: fb_distribution_build
      42             : 
      43             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_distribution_methods'
      44             : 
      45             : ! **************************************************************************************************
      46             : !> \brief derived type containing cost data used for process distribution
      47             : !> \param id               : global atomic index
      48             : !> \param cost             : computational cost for the atomic matrix associated
      49             : !>                           to this atom
      50             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      51             : ! **************************************************************************************************
      52             :    TYPE fb_distribution_element
      53             :       INTEGER :: id = -1
      54             :       REAL(KIND=dp) :: cost = -1.0_dp
      55             :    END TYPE fb_distribution_element
      56             : 
      57             : ! **************************************************************************************************
      58             : !> \brief derived type containing the list of atoms currently allocated to a
      59             : !>        processor
      60             : !> \param list             : list of atoms and their associated costs
      61             : !> \param cost             : total cost of the list
      62             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      63             : ! **************************************************************************************************
      64             :    TYPE fb_distribution_list
      65             :       TYPE(fb_distribution_element), DIMENSION(:), POINTER :: list => NULL()
      66             :       INTEGER :: nelements = -1
      67             :       REAL(KIND=dp) :: cost = -1.0_dp
      68             :    END TYPE fb_distribution_list
      69             : 
      70             : ! **************************************************************************************************
      71             : !> \brief In filter matrix algorithm, each atomic matrix contributes to a
      72             : !>        column in the filter matrix, which is stored in DBCSR format.
      73             : !>        When distributing the atoms (and hence the atomic matrics) to the
      74             : !>        processors, we want the processors to have atoms that would be
      75             : !>        correspond to the block columns in the DBCSR format local to them.
      76             : !>        This derived type stores this information. For each atom, it
      77             : !>        corresponds to a DBCSR block column, and the list of processors
      78             : !>        in the 2D processor grid responsible for this column will be the
      79             : !>        preferred processors for this atom.
      80             : !> \param list             : list of preferred processors for an atom
      81             : !>                           note that here the processors are indexed from
      82             : !>                           1, i.e. = MPI_RANK+1
      83             : !> \param nprocs           : number of processors in the list
      84             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      85             : ! **************************************************************************************************
      86             :    TYPE fb_preferred_procs_list
      87             :       INTEGER, DIMENSION(:), POINTER :: list => NULL()
      88             :       INTEGER :: nprocs = -1
      89             :    END TYPE fb_preferred_procs_list
      90             : 
      91             : ! Parameters related to automatic resizing of the hash_table:
      92             : ! Resize by EXPAND_FACTOR if total no. slots / no. of filled slots < ENLARGE_RATIO
      93             :    INTEGER, PARAMETER, PRIVATE :: ENLARGE_RATIO = 1
      94             :    INTEGER, PARAMETER, PRIVATE :: REDUCE_RATIO = 3
      95             :    INTEGER, PARAMETER, PRIVATE :: EXPAND_FACTOR = 2
      96             :    INTEGER, PARAMETER, PRIVATE :: SHRINK_FACTOR = 2
      97             : 
      98             :    INTERFACE fb_distribution_remove
      99             :       MODULE PROCEDURE fb_distribution_remove_ind, &
     100             :          fb_distribution_remove_el
     101             :    END INTERFACE fb_distribution_remove
     102             : 
     103             :    INTERFACE fb_distribution_move
     104             :       MODULE PROCEDURE fb_distribution_move_ind, &
     105             :          fb_distribution_move_el
     106             :    END INTERFACE fb_distribution_move
     107             : 
     108             : CONTAINS
     109             : 
     110             : ! **************************************************************************************************
     111             : !> \brief Build local atoms associated to filter matrix algorithm for each
     112             : !>        MPI process, trying to balance the load for calculating the
     113             : !>        filter matrix
     114             : !> \param fb_env : the filter matrix environment
     115             : !> \param qs_env : quickstep environment
     116             : !> \param scf_section : SCF input section
     117             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     118             : ! **************************************************************************************************
     119          10 :    SUBROUTINE fb_distribution_build(fb_env, qs_env, scf_section)
     120             :       TYPE(fb_env_obj), INTENT(INOUT)                    :: fb_env
     121             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     122             :       TYPE(section_vals_type), POINTER                   :: scf_section
     123             : 
     124             :       CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_build'
     125             : 
     126             :       INTEGER :: handle, i_common_set, iatom, ii, ipe, lb, lowest_cost_ind, my_pe, n_common_sets, &
     127             :          natoms, nhalo_atoms, nkinds, nprocs, owner_id_in_halo, pref_pe, ub
     128          10 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: common_set_ids, local_atoms_all, &
     129          10 :                                                             local_atoms_sizes, local_atoms_starts, &
     130          10 :                                                             pe, pos_in_preferred_list
     131          10 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms, local_atoms
     132             :       LOGICAL                                            :: acceptable_move, move_happened
     133             :       REAL(KIND=dp)                                      :: average_cost
     134          10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cost_per_atom, cost_per_proc
     135             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: pair_radii
     136          10 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: rcut
     137             :       TYPE(cell_type), POINTER                           :: cell
     138          10 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_ks
     139             :       TYPE(fb_atomic_halo_obj)                           :: atomic_halo
     140             :       TYPE(fb_distribution_element)                      :: element
     141             :       TYPE(fb_distribution_list), ALLOCATABLE, &
     142          10 :          DIMENSION(:)                                    :: dist
     143             :       TYPE(fb_preferred_procs_list), ALLOCATABLE, &
     144          10 :          DIMENSION(:)                                    :: preferred_procs_set
     145             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     146          10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     147          10 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     148             : 
     149          10 :       CALL timeset(routineN, handle)
     150             : 
     151          10 :       NULLIFY (mat_ks, rcut, cell, para_env, particle_set, qs_kind_set, &
     152          10 :                halo_atoms, local_atoms)
     153          10 :       CALL fb_atomic_halo_nullify(atomic_halo)
     154             : 
     155             :       ! obtain relevant data from fb_env, qs_env
     156             :       CALL fb_env_get(fb_env=fb_env, &
     157          10 :                       rcut=rcut)
     158             :       CALL get_qs_env(qs_env=qs_env, &
     159             :                       natom=natoms, &
     160             :                       particle_set=particle_set, &
     161             :                       qs_kind_set=qs_kind_set, &
     162             :                       nkind=nkinds, &
     163             :                       cell=cell, &
     164             :                       para_env=para_env, &
     165          10 :                       matrix_ks=mat_ks)
     166          10 :       nprocs = para_env%num_pe
     167          10 :       my_pe = para_env%mepos + 1 ! counting from 1
     168             : 
     169             :       ! for each global atom, build atomic halo and get the associated cost
     170          40 :       ALLOCATE (pair_radii(nkinds, nkinds))
     171          10 :       CALL fb_build_pair_radii(rcut, nkinds, pair_radii)
     172          10 :       CALL fb_atomic_halo_create(atomic_halo)
     173          30 :       ALLOCATE (cost_per_atom(natoms))
     174          90 :       DO iatom = 1, natoms
     175          80 :          CALL fb_atomic_halo_init(atomic_halo)
     176             :          CALL fb_atomic_halo_build_halo_atoms(iatom, &
     177             :                                               particle_set, &
     178             :                                               cell, &
     179             :                                               pair_radii, &
     180             :                                               halo_atoms, &
     181             :                                               nhalo_atoms, &
     182          80 :                                               owner_id_in_halo)
     183             :          CALL fb_atomic_halo_set(atomic_halo=atomic_halo, &
     184             :                                  owner_atom=iatom, &
     185             :                                  natoms=nhalo_atoms, &
     186          80 :                                  halo_atoms=halo_atoms)
     187          80 :          NULLIFY (halo_atoms)
     188         170 :          cost_per_atom(iatom) = fb_atomic_halo_cost(atomic_halo, particle_set, qs_kind_set)
     189             :       END DO
     190          10 :       DEALLOCATE (pair_radii)
     191          10 :       CALL fb_atomic_halo_release(atomic_halo)
     192             : 
     193             :       ! build the preferred_procs_set according to DBCSR mat H
     194         110 :       ALLOCATE (preferred_procs_set(natoms))
     195          30 :       ALLOCATE (common_set_ids(natoms))
     196             :       CALL fb_build_preferred_procs(mat_ks(1)%matrix, &
     197             :                                     natoms, &
     198             :                                     preferred_procs_set, &
     199             :                                     common_set_ids, &
     200          10 :                                     n_common_sets)
     201             : 
     202             :       ! for each atomic halo, construct distribution_element, and assign
     203             :       ! the element to a processors using preferred_procs_set in a
     204             :       ! round-robin manner
     205          50 :       ALLOCATE (dist(nprocs))
     206          30 :       DO ipe = 1, nprocs
     207          30 :          CALL fb_distribution_init(dist=dist(ipe))
     208             :       END DO
     209          30 :       ALLOCATE (pos_in_preferred_list(n_common_sets))
     210          20 :       pos_in_preferred_list(:) = 0
     211          90 :       DO iatom = 1, natoms
     212          80 :          element%id = iatom
     213          80 :          element%cost = cost_per_atom(iatom)
     214          80 :          i_common_set = common_set_ids(iatom)
     215             :          pos_in_preferred_list(i_common_set) = &
     216             :             MOD(pos_in_preferred_list(i_common_set), &
     217          80 :                 preferred_procs_set(iatom)%nprocs) + 1
     218          80 :          ipe = preferred_procs_set(iatom)%list(pos_in_preferred_list(i_common_set))
     219          90 :          CALL fb_distribution_add(dist(ipe), element)
     220             :       END DO
     221             : 
     222          10 :       DEALLOCATE (pos_in_preferred_list)
     223          10 :       DEALLOCATE (common_set_ids)
     224          10 :       DEALLOCATE (cost_per_atom)
     225             : 
     226             :       ! sort processors according to the overall cost of their assigned
     227             :       ! corresponding distribution
     228          30 :       ALLOCATE (cost_per_proc(nprocs))
     229          30 :       DO ipe = 1, nprocs
     230          30 :          cost_per_proc(ipe) = dist(ipe)%cost
     231             :       END DO
     232          30 :       ALLOCATE (pe(nprocs))
     233          10 :       CALL sort(cost_per_proc, nprocs, pe)
     234             :       ! now that cost_per_proc is sorted, ipe's no longer give mpi
     235             :       ! ranks, the correct one to use should be pe(ipe)
     236             : 
     237             :       ! work out the ideal average cost per proc if work load is evenly
     238             :       ! distributed
     239          30 :       average_cost = SUM(cost_per_proc)/REAL(nprocs, dp)
     240             : 
     241          10 :       DEALLOCATE (cost_per_proc)
     242             : 
     243             :       ! loop over the processors, starting with the highest cost, move
     244             :       ! atoms one by one:
     245             :       !   1. FIRST to the next processor in the preferred list that has
     246             :       !      cost below average. IF no such proc is found, THEN
     247             :       !   2. to the next procesor in the overall list that has cost
     248             :       !      below average.
     249             :       ! repeat until the cost on this processor is less than or equal
     250             :       ! to the average cost
     251          10 :       lowest_cost_ind = 1
     252          30 :       DO ipe = nprocs, 1, -1
     253          30 :          redistribute: DO WHILE (dist(pe(ipe))%cost .GT. average_cost)
     254           0 :             iatom = dist(pe(ipe))%list(lowest_cost_ind)%id
     255           0 :             move_happened = .FALSE.
     256             :             ! first try to move to a preferred process
     257           0 :             preferred: DO ii = 1, preferred_procs_set(iatom)%nprocs
     258           0 :                pref_pe = preferred_procs_set(iatom)%list(ii)
     259             :                acceptable_move = &
     260             :                   fb_distribution_acceptable_move(dist(pe(ipe)), &
     261             :                                                   dist(pe(ipe))%list(lowest_cost_ind), &
     262             :                                                   dist(pref_pe), &
     263           0 :                                                   average_cost)
     264           0 :                IF ((pref_pe .NE. pe(ipe)) .AND. acceptable_move) THEN
     265             :                   CALL fb_distribution_move(dist(pe(ipe)), &
     266             :                                             lowest_cost_ind, &
     267           0 :                                             dist(pref_pe))
     268             :                   move_happened = .TRUE.
     269             :                   EXIT preferred
     270             :                END IF
     271             :             END DO preferred
     272             :             ! if no preferred process is available, move to a proc in
     273             :             ! the sorted list that has cost less than average.  remember
     274             :             ! that some of the proc may have already taken redistributed
     275             :             ! atoms, and thus may become unavailable (full)
     276             :             IF (.NOT. move_happened) THEN
     277             :                ! searching from the proc with the least initial cost
     278           0 :                next_in_line: DO ii = 1, nprocs
     279             :                   acceptable_move = &
     280             :                      fb_distribution_acceptable_move(dist(pe(ipe)), &
     281             :                                                      dist(pe(ipe))%list(lowest_cost_ind), &
     282             :                                                      dist(pe(ii)), &
     283           0 :                                                      average_cost)
     284           0 :                   IF ((pe(ii) .NE. pe(ipe)) .AND. acceptable_move) THEN
     285             :                      CALL fb_distribution_move(dist(pe(ipe)), &
     286             :                                                lowest_cost_ind, &
     287           0 :                                                dist(pe(ii)))
     288           0 :                      move_happened = .TRUE.
     289           0 :                      EXIT next_in_line
     290             :                   END IF
     291             :                END DO next_in_line
     292             :             END IF
     293             :             ! if the atom cannot be moved, then this means it is too
     294             :             ! costly for all other processes to accept. When this
     295             :             ! happens we must stop the redistribution process for this
     296             :             ! processor---as all other of its atoms will be even more
     297             :             ! costly
     298          20 :             IF (.NOT. move_happened) THEN
     299             :                EXIT redistribute
     300             :             END IF
     301             :          END DO redistribute ! while
     302             :       END DO ! ipe
     303             : 
     304          10 :       DEALLOCATE (pe)
     305          90 :       DO ii = 1, SIZE(preferred_procs_set)
     306          90 :          CALL fb_preferred_procs_list_release(preferred_procs_set(ii))
     307             :       END DO
     308          10 :       DEALLOCATE (preferred_procs_set)
     309             : 
     310             :       ! generate local atoms from dist
     311          30 :       ALLOCATE (local_atoms_all(natoms))
     312          20 :       ALLOCATE (local_atoms_starts(nprocs))
     313          20 :       ALLOCATE (local_atoms_sizes(nprocs))
     314             :       CALL fb_distribution_to_local_atoms(dist, &
     315             :                                           local_atoms_all, &
     316             :                                           local_atoms_starts, &
     317          10 :                                           local_atoms_sizes)
     318          30 :       ALLOCATE (local_atoms(local_atoms_sizes(my_pe)))
     319          10 :       lb = local_atoms_starts(my_pe)
     320          10 :       ub = local_atoms_starts(my_pe) + local_atoms_sizes(my_pe) - 1
     321          50 :       local_atoms(1:local_atoms_sizes(my_pe)) = local_atoms_all(lb:ub)
     322             :       CALL fb_env_set(fb_env=fb_env, &
     323             :                       local_atoms=local_atoms, &
     324          10 :                       nlocal_atoms=local_atoms_sizes(my_pe))
     325             : 
     326             :       ! write out info
     327          10 :       CALL fb_distribution_write_info(dist, scf_section)
     328             : 
     329          10 :       DEALLOCATE (local_atoms_all)
     330          10 :       DEALLOCATE (local_atoms_starts)
     331          10 :       DEALLOCATE (local_atoms_sizes)
     332          30 :       DO ipe = 1, SIZE(dist)
     333          30 :          CALL fb_distribution_release(dist(ipe))
     334             :       END DO
     335          10 :       DEALLOCATE (dist)
     336             : 
     337          10 :       CALL timestop(handle)
     338             : 
     339          20 :    END SUBROUTINE fb_distribution_build
     340             : 
     341             : ! **************************************************************************************************
     342             : !> \brief Checks if moving an element from one distribution to another is
     343             : !>        allowed in mind of load balancing.
     344             : !> \param dist_from : the source distribution
     345             : !> \param element   : the element in source distribution considered for the
     346             : !>                    move
     347             : !> \param dist_to   : the destination distribution
     348             : !> \param threshold ...
     349             : !> \return : TRUE or FALSE
     350             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     351             : ! **************************************************************************************************
     352           0 :    PURE FUNCTION fb_distribution_acceptable_move(dist_from, &
     353             :                                                  element, &
     354             :                                                  dist_to, &
     355             :                                                  threshold) &
     356             :       RESULT(acceptable)
     357             :       TYPE(fb_distribution_list), INTENT(IN)             :: dist_from
     358             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     359             :       TYPE(fb_distribution_list), INTENT(IN)             :: dist_to
     360             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     361             :       LOGICAL                                            :: acceptable
     362             : 
     363             :       acceptable = (dist_to%cost + element%cost .LT. dist_from%cost) .AND. &
     364           0 :                    (dist_to%cost .LT. threshold)
     365           0 :    END FUNCTION fb_distribution_acceptable_move
     366             : 
     367             : ! **************************************************************************************************
     368             : !> \brief Write out information on the load distribution on processors
     369             : !> \param dist_set    : set of distributions for the processors
     370             : !> \param scf_section : SCF input section
     371             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     372             : ! **************************************************************************************************
     373          10 :    SUBROUTINE fb_distribution_write_info(dist_set, scf_section)
     374             :       TYPE(fb_distribution_list), DIMENSION(:), &
     375             :          INTENT(IN)                                      :: dist_set
     376             :       TYPE(section_vals_type), POINTER                   :: scf_section
     377             : 
     378             :       INTEGER                                            :: ii, max_natoms, min_natoms, natoms, &
     379             :                                                             nprocs, unit_nr
     380             :       REAL(KIND=dp)                                      :: ave_cost, ave_natoms, max_cost, &
     381             :                                                             min_cost, total_cost
     382             :       TYPE(cp_logger_type), POINTER                      :: logger
     383             : 
     384          10 :       nprocs = SIZE(dist_set)
     385          10 :       natoms = 0
     386          10 :       total_cost = 0.0_dp
     387          30 :       DO ii = 1, nprocs
     388          20 :          natoms = natoms + dist_set(ii)%nelements
     389          30 :          total_cost = total_cost + dist_set(ii)%cost
     390             :       END DO
     391          10 :       ave_natoms = REAL(natoms, dp)/REAL(nprocs, dp)
     392          10 :       ave_cost = total_cost/REAL(nprocs, dp)
     393          10 :       max_natoms = 0
     394          10 :       max_cost = 0._dp
     395          30 :       DO ii = 1, nprocs
     396          20 :          max_natoms = MAX(max_natoms, dist_set(ii)%nelements)
     397          30 :          max_cost = MAX(max_cost, dist_set(ii)%cost)
     398             :       END DO
     399          10 :       min_natoms = natoms
     400          10 :       min_cost = total_cost
     401          30 :       DO ii = 1, nprocs
     402          20 :          min_natoms = MIN(min_natoms, dist_set(ii)%nelements)
     403          30 :          min_cost = MIN(min_cost, dist_set(ii)%cost)
     404             :       END DO
     405             : 
     406          10 :       logger => cp_get_default_logger()
     407             :       unit_nr = cp_print_key_unit_nr(logger, scf_section, &
     408             :                                      "PRINT%FILTER_MATRIX", &
     409          10 :                                      extension="")
     410             : 
     411          10 :       IF (unit_nr > 0) THEN
     412             :          WRITE (UNIT=unit_nr, FMT="(/,A,I6,A)") &
     413           5 :             " FILTER_MAT_DIAG| Load distribution across ", nprocs, " processors:"
     414             :          WRITE (UNIT=unit_nr, &
     415             :                 FMT="(A,T40,A,T55,A,T70,A,T85,A)") &
     416           5 :             " FILTER_MAT_DIAG| ", "Total", "Average", "Max", "Min"
     417             :          WRITE (UNIT=unit_nr, &
     418             :                 FMT="(A,T40,I12,T55,F12.1,T70,I12,T85,I10)") &
     419           5 :             " FILTER_MAT_DIAG|   Atomic Matrices", &
     420          10 :             natoms, ave_natoms, max_natoms, min_natoms
     421             :          WRITE (UNIT=unit_nr, &
     422             :                 FMT="(A,T40,D12.7,T55,D12.7,T70,D12.7,T85,D12.7)") &
     423           5 :             " FILTER_MAT_DIAG|   Cost*", &
     424          10 :             total_cost, ave_cost, max_cost, min_cost
     425             :          WRITE (UNIT=unit_nr, FMT="(A)") &
     426           5 :             " FILTER_MAT_DIAG| (* cost is calculated as sum of cube of atomic matrix sizes)"
     427             :       END IF
     428             :       CALL cp_print_key_finished_output(unit_nr, logger, scf_section, &
     429          10 :                                         "PRINT%FILTER_MATRIX")
     430          10 :    END SUBROUTINE fb_distribution_write_info
     431             : 
     432             : ! **************************************************************************************************
     433             : !> \brief Build the preferred list of processors for atoms
     434             : !> \param dbcsr_mat   : the reference DBCSR matrix, from which the local block
     435             : !>                      cols and the processor maps are obtained
     436             : !> \param natoms      : total number of atoms globally
     437             : !> \param preferred_procs_set : set of preferred procs list for each atom
     438             : !> \param common_set_ids : atoms (block cols) local to the same processor grid
     439             : !>                         col will have the same preferred list. This list
     440             : !>                         maps each atom to their corresponding group
     441             : !> \param n_common_sets  : number of unique preferred lists (groups)
     442             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     443             : ! **************************************************************************************************
     444          40 :    SUBROUTINE fb_build_preferred_procs(dbcsr_mat, &
     445             :                                        natoms, &
     446          10 :                                        preferred_procs_set, &
     447          10 :                                        common_set_ids, &
     448             :                                        n_common_sets)
     449             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     450             :       INTEGER, INTENT(IN)                                :: natoms
     451             :       TYPE(fb_preferred_procs_list), DIMENSION(:), &
     452             :          INTENT(INOUT)                                   :: preferred_procs_set
     453             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: common_set_ids
     454             :       INTEGER, INTENT(OUT)                               :: n_common_sets
     455             : 
     456             :       INTEGER                                            :: icol, nblkcols_tot, nprows, pcol, prow
     457          10 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist
     458          10 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     459             :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
     460             : 
     461          10 :       CALL dbcsr_get_info(dbcsr_mat, nblkcols_total=nblkcols_tot)
     462          10 :       CPASSERT(natoms <= nblkcols_tot)
     463          10 :       CPASSERT(SIZE(preferred_procs_set) >= natoms)
     464          10 :       CPASSERT(SIZE(common_set_ids) >= natoms)
     465             : 
     466          10 :       CALL dbcsr_get_info(dbcsr_mat, distribution=dbcsr_dist, proc_col_dist=col_dist)
     467          10 :       CALL dbcsr_distribution_get(dbcsr_dist, pgrid=pgrid, nprows=nprows, npcols=n_common_sets)
     468             : 
     469          90 :       DO icol = 1, natoms
     470          80 :          IF (ASSOCIATED(preferred_procs_set(icol)%list)) THEN
     471           0 :             DEALLOCATE (preferred_procs_set(icol)%list)
     472             :          END IF
     473         240 :          ALLOCATE (preferred_procs_set(icol)%list(nprows))
     474          80 :          pcol = col_dist(icol)
     475             :          ! dbcsr prow and pcol counts from 0
     476         240 :          DO prow = 0, nprows - 1
     477             :             ! here, we count processes from 1, so +1 from mpirank
     478         240 :             preferred_procs_set(icol)%list(prow + 1) = pgrid(prow, pcol) + 1
     479             :          END DO
     480          90 :          preferred_procs_set(icol)%nprocs = nprows
     481             :       END DO
     482             : 
     483          90 :       common_set_ids(:) = 0
     484          90 :       common_set_ids(1:natoms) = col_dist(1:natoms) + 1
     485             : 
     486          10 :    END SUBROUTINE fb_build_preferred_procs
     487             : 
     488             : ! **************************************************************************************************
     489             : !> \brief Release a preferred_procs_list
     490             : !> \param preferred_procs_list  : the preferred procs list in question
     491             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     492             : ! **************************************************************************************************
     493          80 :    SUBROUTINE fb_preferred_procs_list_release(preferred_procs_list)
     494             :       TYPE(fb_preferred_procs_list), INTENT(INOUT)       :: preferred_procs_list
     495             : 
     496          80 :       IF (ASSOCIATED(preferred_procs_list%list)) THEN
     497          80 :          DEALLOCATE (preferred_procs_list%list)
     498             :       END IF
     499          80 :    END SUBROUTINE fb_preferred_procs_list_release
     500             : 
     501             : ! **************************************************************************************************
     502             : !> \brief Convert distribution data to 1D array containing information of
     503             : !>        which atoms are distributed to which processor
     504             : !> \param dist_set    : set of distributions for the processors
     505             : !> \param local_atoms : continuous array of atoms arranged in order
     506             : !>                      corresponding their allocated processors
     507             : !> \param local_atoms_starts : starting position in local_atoms array for
     508             : !>                             each processor
     509             : !> \param local_atoms_sizes  : number of atoms local to each processor
     510             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     511             : ! **************************************************************************************************
     512          10 :    SUBROUTINE fb_distribution_to_local_atoms(dist_set, &
     513          20 :                                              local_atoms, &
     514          10 :                                              local_atoms_starts, &
     515          10 :                                              local_atoms_sizes)
     516             :       TYPE(fb_distribution_list), DIMENSION(:), &
     517             :          INTENT(IN)                                      :: dist_set
     518             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: local_atoms, local_atoms_starts, &
     519             :                                                             local_atoms_sizes
     520             : 
     521             :       INTEGER                                            :: iatom, ipe, n_procs, pos
     522             :       LOGICAL                                            :: check_ok
     523             : 
     524          10 :       n_procs = SIZE(dist_set)
     525             : 
     526          10 :       check_ok = SIZE(local_atoms_starts) .GE. n_procs
     527          10 :       CPASSERT(check_ok)
     528          10 :       check_ok = SIZE(local_atoms_sizes) .GE. n_procs
     529          10 :       CPASSERT(check_ok)
     530             : 
     531          90 :       local_atoms(:) = 0
     532          30 :       local_atoms_starts(:) = 0
     533          30 :       local_atoms_sizes(:) = 0
     534             : 
     535             :       pos = 1
     536          30 :       DO ipe = 1, n_procs
     537          20 :          local_atoms_starts(ipe) = pos
     538         110 :          DO iatom = 1, dist_set(ipe)%nelements
     539          80 :             local_atoms(pos) = dist_set(ipe)%list(iatom)%id
     540          80 :             pos = pos + 1
     541         100 :             local_atoms_sizes(ipe) = local_atoms_sizes(ipe) + 1
     542             :          END DO
     543             :       END DO
     544          10 :    END SUBROUTINE fb_distribution_to_local_atoms
     545             : 
     546             : ! **************************************************************************************************
     547             : !> \brief Initialise a distribution
     548             : !> \param dist        : the distribution in question
     549             : !> \param nmax        : [OPTIONAL] size of the list array to be allocated
     550             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     551             : ! **************************************************************************************************
     552          20 :    SUBROUTINE fb_distribution_init(dist, nmax)
     553             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     554             :       INTEGER, INTENT(IN), OPTIONAL                      :: nmax
     555             : 
     556             :       INTEGER                                            :: ii, my_nmax
     557             : 
     558          20 :       my_nmax = 0
     559          20 :       IF (PRESENT(nmax)) my_nmax = nmax
     560          20 :       IF (ASSOCIATED(dist%list)) THEN
     561           0 :          DEALLOCATE (dist%list)
     562             :       END IF
     563          20 :       NULLIFY (dist%list)
     564          20 :       IF (my_nmax .GT. 0) THEN
     565           0 :          ALLOCATE (dist%list(my_nmax))
     566           0 :          DO ii = 1, SIZE(dist%list)
     567           0 :             dist%list(ii)%id = 0
     568           0 :             dist%list(ii)%cost = 0.0_dp
     569             :          END DO
     570             :       END IF
     571          20 :       dist%nelements = 0
     572          20 :       dist%cost = 0.0_dp
     573          20 :    END SUBROUTINE fb_distribution_init
     574             : 
     575             : ! **************************************************************************************************
     576             : !> \brief Resize the list array in a distribution
     577             : !> \param dist        : The distribution in question
     578             : !> \param nmax        : new size of the list array
     579             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     580             : ! **************************************************************************************************
     581          60 :    SUBROUTINE fb_distribution_resize(dist, nmax)
     582             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     583             :       INTEGER, INTENT(IN)                                :: nmax
     584             : 
     585             :       INTEGER                                            :: ii, my_nmax
     586             :       TYPE(fb_distribution_element), DIMENSION(:), &
     587          60 :          POINTER                                         :: new_list
     588             : 
     589          60 :       IF (.NOT. ASSOCIATED(dist%list)) THEN
     590          20 :          my_nmax = MAX(nmax, 1)
     591          80 :          ALLOCATE (dist%list(my_nmax))
     592             :       ELSE
     593          40 :          my_nmax = MAX(nmax, dist%nelements)
     594         240 :          ALLOCATE (new_list(my_nmax))
     595         160 :          DO ii = 1, SIZE(new_list)
     596         120 :             new_list(ii)%id = 0
     597         160 :             new_list(ii)%cost = 0.0_dp
     598             :          END DO
     599         100 :          DO ii = 1, dist%nelements
     600         100 :             new_list(ii) = dist%list(ii)
     601             :          END DO
     602          40 :          DEALLOCATE (dist%list)
     603          40 :          dist%list => new_list
     604             :       END IF
     605          60 :    END SUBROUTINE fb_distribution_resize
     606             : 
     607             : ! **************************************************************************************************
     608             : !> \brief Add an atom (element) to a distribution
     609             : !> \param dist        : the distribution in question
     610             : !> \param element     : the element to be added
     611             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     612             : ! **************************************************************************************************
     613          80 :    SUBROUTINE fb_distribution_add(dist, element)
     614             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     615             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     616             : 
     617             :       INTEGER                                            :: ii, new_nelements, pos
     618             : 
     619          80 :       new_nelements = dist%nelements + 1
     620             : 
     621             :       ! resize list if necessary
     622          80 :       IF (.NOT. ASSOCIATED(dist%list)) THEN
     623          20 :          CALL fb_distribution_resize(dist, new_nelements)
     624          60 :       ELSE IF (new_nelements*ENLARGE_RATIO .GT. SIZE(dist%list)) THEN
     625          40 :          CALL fb_distribution_resize(dist, SIZE(dist%list)*EXPAND_FACTOR)
     626             :       END IF
     627             :       ! assuming the list of elements is always sorted with respect to cost
     628             :       ! slot the new element into the appropriate spot
     629          80 :       IF (new_nelements == 1) THEN
     630          20 :          dist%list(1) = element
     631             :       ELSE
     632          60 :          pos = fb_distribution_find_slot(dist, element)
     633          60 :          DO ii = dist%nelements, pos, -1
     634          60 :             dist%list(ii + 1) = dist%list(ii)
     635             :          END DO
     636          60 :          dist%list(pos) = element
     637             :       END IF
     638          80 :       dist%nelements = new_nelements
     639          80 :       dist%cost = dist%cost + element%cost
     640          80 :    END SUBROUTINE fb_distribution_add
     641             : 
     642             : ! **************************************************************************************************
     643             : !> \brief Find the correct slot in the list array to add a new element, so that
     644             : !>        the list will always be ordered with respect to cost
     645             : !> \param dist        : the distribution in question
     646             : !> \param element     : element to be added
     647             : !> \return : the correct position to add the new element
     648             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     649             : ! **************************************************************************************************
     650          60 :    PURE FUNCTION fb_distribution_find_slot(dist, element) RESULT(pos)
     651             :       TYPE(fb_distribution_list), INTENT(IN)             :: dist
     652             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     653             :       INTEGER                                            :: pos
     654             : 
     655             :       INTEGER                                            :: lower, middle, N, upper
     656             : 
     657          60 :       N = dist%nelements
     658          60 :       IF (element%cost .LT. dist%list(1)%cost) THEN
     659          60 :          pos = 1
     660             :          RETURN
     661             :       END IF
     662          60 :       IF (element%cost .GE. dist%list(N)%cost) THEN
     663          60 :          pos = N + 1
     664          60 :          RETURN
     665             :       END IF
     666             :       lower = 1
     667             :       upper = N
     668           0 :       DO WHILE ((upper - lower) .GT. 1)
     669           0 :          middle = (lower + upper)/2
     670           0 :          IF (element%cost .LT. dist%list(middle)%cost) THEN
     671             :             upper = middle
     672             :          ELSE
     673           0 :             lower = middle
     674             :          END IF
     675             :       END DO
     676          60 :       pos = upper
     677             :    END FUNCTION fb_distribution_find_slot
     678             : 
     679             : ! **************************************************************************************************
     680             : !> \brief Remove the pos-th element from a distribution
     681             : !> \param dist        : the distribution in question
     682             : !> \param pos         : index of the element in the list array
     683             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     684             : ! **************************************************************************************************
     685           0 :    SUBROUTINE fb_distribution_remove_ind(dist, pos)
     686             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     687             :       INTEGER, INTENT(IN)                                :: pos
     688             : 
     689             :       INTEGER                                            :: ii
     690             :       LOGICAL                                            :: check_ok
     691             : 
     692           0 :       check_ok = pos .GT. 0
     693           0 :       CPASSERT(check_ok)
     694           0 :       IF (pos .LE. dist%nelements) THEN
     695           0 :          dist%cost = dist%cost - dist%list(pos)%cost
     696           0 :          DO ii = pos, dist%nelements - 1
     697           0 :             dist%list(ii) = dist%list(ii + 1)
     698             :          END DO
     699           0 :          dist%list(dist%nelements)%id = 0
     700           0 :          dist%list(dist%nelements)%cost = 0.0_dp
     701           0 :          dist%nelements = dist%nelements - 1
     702             :          ! auto resize if required
     703           0 :          IF (dist%nelements*REDUCE_RATIO .LT. SIZE(dist%list)) THEN
     704           0 :             CALL fb_distribution_resize(dist, dist%nelements/SHRINK_FACTOR)
     705             :          END IF
     706             :       END IF
     707           0 :    END SUBROUTINE fb_distribution_remove_ind
     708             : 
     709             : ! **************************************************************************************************
     710             : !> \brief Remove a given element from a distribution
     711             : !> \param dist        : the distribution in question
     712             : !> \param element     : the element in question
     713             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     714             : ! **************************************************************************************************
     715           0 :    SUBROUTINE fb_distribution_remove_el(dist, element)
     716             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     717             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     718             : 
     719             :       INTEGER                                            :: ii, pos
     720             : 
     721           0 :       pos = dist%nelements + 1
     722           0 :       DO ii = 1, dist%nelements
     723           0 :          IF (element%id == dist%list(ii)%id) THEN
     724           0 :             pos = ii
     725           0 :             EXIT
     726             :          END IF
     727             :       END DO
     728           0 :       CALL fb_distribution_remove_ind(dist, pos)
     729           0 :    END SUBROUTINE fb_distribution_remove_el
     730             : 
     731             : ! **************************************************************************************************
     732             : !> \brief Move the pos-th element from a distribution to another
     733             : !> \param dist_from   : the source distribution
     734             : !> \param pos         : index of the element in the source distribution
     735             : !> \param dist_to     : the destination distribution
     736             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     737             : ! **************************************************************************************************
     738           0 :    SUBROUTINE fb_distribution_move_ind(dist_from, pos, dist_to)
     739             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
     740             :       INTEGER, INTENT(IN)                                :: pos
     741             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
     742             : 
     743             :       LOGICAL                                            :: check_ok
     744             :       TYPE(fb_distribution_element)                      :: element
     745             : 
     746           0 :       check_ok = ASSOCIATED(dist_from%list)
     747           0 :       CPASSERT(check_ok)
     748           0 :       check_ok = pos .LE. dist_from%nelements
     749           0 :       CPASSERT(check_ok)
     750           0 :       element = dist_from%list(pos)
     751           0 :       CALL fb_distribution_add(dist_to, element)
     752           0 :       CALL fb_distribution_remove(dist_from, pos)
     753           0 :    END SUBROUTINE fb_distribution_move_ind
     754             : 
     755             : ! **************************************************************************************************
     756             : !> \brief Move a given element from a distribution to another
     757             : !> \param dist_from   : the source distribution
     758             : !> \param element     : the element in question
     759             : !> \param dist_to     : the destination distribution
     760             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     761             : ! **************************************************************************************************
     762           0 :    SUBROUTINE fb_distribution_move_el(dist_from, element, dist_to)
     763             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
     764             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     765             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
     766             : 
     767             :       LOGICAL                                            :: check_ok
     768             : 
     769           0 :       check_ok = ASSOCIATED(dist_from%list)
     770           0 :       CPASSERT(check_ok)
     771           0 :       CALL fb_distribution_add(dist_to, element)
     772           0 :       CALL fb_distribution_remove(dist_from, element)
     773           0 :    END SUBROUTINE fb_distribution_move_el
     774             : 
     775             : ! **************************************************************************************************
     776             : !> \brief Release a distribution
     777             : !> \param dist  : the distribution in question
     778             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     779             : ! **************************************************************************************************
     780          20 :    SUBROUTINE fb_distribution_release(dist)
     781             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     782             : 
     783          20 :       IF (ASSOCIATED(dist%list)) THEN
     784          20 :          DEALLOCATE (dist%list)
     785             :       END IF
     786          20 :    END SUBROUTINE fb_distribution_release
     787             : 
     788           0 : END MODULE qs_fb_distribution_methods

Generated by: LCOV version 1.15