LCOV - code coverage report
Current view: top level - src - qs_fb_distribution_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:2fce0f8) Lines: 210 274 76.6 %
Date: 2024-12-21 06:28:57 Functions: 10 18 55.6 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : MODULE qs_fb_distribution_methods
       9             : 
      10             :    USE cell_types,                      ONLY: cell_type
      11             :    USE cp_dbcsr_api,                    ONLY: dbcsr_distribution_get,&
      12             :                                               dbcsr_distribution_type,&
      13             :                                               dbcsr_get_info,&
      14             :                                               dbcsr_nblkcols_total,&
      15             :                                               dbcsr_p_type,&
      16             :                                               dbcsr_type
      17             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      18             :                                               cp_logger_type
      19             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      20             :                                               cp_print_key_unit_nr
      21             :    USE input_section_types,             ONLY: section_vals_type
      22             :    USE kinds,                           ONLY: dp
      23             :    USE message_passing,                 ONLY: mp_para_env_type
      24             :    USE particle_types,                  ONLY: particle_type
      25             :    USE qs_environment_types,            ONLY: get_qs_env,&
      26             :                                               qs_environment_type
      27             :    USE qs_fb_atomic_halo_types,         ONLY: &
      28             :         fb_atomic_halo_build_halo_atoms, fb_atomic_halo_cost, fb_atomic_halo_create, &
      29             :         fb_atomic_halo_init, fb_atomic_halo_nullify, fb_atomic_halo_obj, fb_atomic_halo_release, &
      30             :         fb_atomic_halo_set, fb_build_pair_radii
      31             :    USE qs_fb_env_types,                 ONLY: fb_env_get,&
      32             :                                               fb_env_obj,&
      33             :                                               fb_env_set
      34             :    USE qs_kind_types,                   ONLY: qs_kind_type
      35             :    USE util,                            ONLY: sort
      36             : #include "./base/base_uses.f90"
      37             : 
      38             :    IMPLICIT NONE
      39             : 
      40             :    PRIVATE
      41             : 
      42             :    PUBLIC :: fb_distribution_build
      43             : 
      44             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_distribution_methods'
      45             : 
      46             : ! **************************************************************************************************
      47             : !> \brief derived type containing cost data used for process distribution
      48             : !> \param id               : global atomic index
      49             : !> \param cost             : computational cost for the atomic matrix associated
      50             : !>                           to this atom
      51             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      52             : ! **************************************************************************************************
      53             :    TYPE fb_distribution_element
      54             :       INTEGER :: id = -1
      55             :       REAL(KIND=dp) :: cost = -1.0_dp
      56             :    END TYPE fb_distribution_element
      57             : 
      58             : ! **************************************************************************************************
      59             : !> \brief derived type containing the list of atoms currently allocated to a
      60             : !>        processor
      61             : !> \param list             : list of atoms and their associated costs
      62             : !> \param cost             : total cost of the list
      63             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      64             : ! **************************************************************************************************
      65             :    TYPE fb_distribution_list
      66             :       TYPE(fb_distribution_element), DIMENSION(:), POINTER :: list => NULL()
      67             :       INTEGER :: nelements = -1
      68             :       REAL(KIND=dp) :: cost = -1.0_dp
      69             :    END TYPE fb_distribution_list
      70             : 
      71             : ! **************************************************************************************************
      72             : !> \brief In filter matrix algorithm, each atomic matrix contributes to a
      73             : !>        column in the filter matrix, which is stored in DBCSR format.
      74             : !>        When distributing the atoms (and hence the atomic matrics) to the
      75             : !>        processors, we want the processors to have atoms that would be
      76             : !>        correspond to the block columns in the DBCSR format local to them.
      77             : !>        This derived type stores this information. For each atom, it
      78             : !>        corresponds to a DBCSR block column, and the list of processors
      79             : !>        in the 2D processor grid responsible for this column will be the
      80             : !>        preferred processors for this atom.
      81             : !> \param list             : list of preferred processors for an atom
      82             : !>                           note that here the processors are indexed from
      83             : !>                           1, i.e. = MPI_RANK+1
      84             : !> \param nprocs           : number of processors in the list
      85             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      86             : ! **************************************************************************************************
      87             :    TYPE fb_preferred_procs_list
      88             :       INTEGER, DIMENSION(:), POINTER :: list => NULL()
      89             :       INTEGER :: nprocs = -1
      90             :    END TYPE fb_preferred_procs_list
      91             : 
      92             : ! Parameters related to automatic resizing of the hash_table:
      93             : ! Resize by EXPAND_FACTOR if total no. slots / no. of filled slots < ENLARGE_RATIO
      94             :    INTEGER, PARAMETER, PRIVATE :: ENLARGE_RATIO = 1
      95             :    INTEGER, PARAMETER, PRIVATE :: REDUCE_RATIO = 3
      96             :    INTEGER, PARAMETER, PRIVATE :: EXPAND_FACTOR = 2
      97             :    INTEGER, PARAMETER, PRIVATE :: SHRINK_FACTOR = 2
      98             : 
      99             :    INTERFACE fb_distribution_remove
     100             :       MODULE PROCEDURE fb_distribution_remove_ind, &
     101             :          fb_distribution_remove_el
     102             :    END INTERFACE fb_distribution_remove
     103             : 
     104             :    INTERFACE fb_distribution_move
     105             :       MODULE PROCEDURE fb_distribution_move_ind, &
     106             :          fb_distribution_move_el
     107             :    END INTERFACE fb_distribution_move
     108             : 
     109             : CONTAINS
     110             : 
     111             : ! **************************************************************************************************
     112             : !> \brief Build local atoms associated to filter matrix algorithm for each
     113             : !>        MPI process, trying to balance the load for calculating the
     114             : !>        filter matrix
     115             : !> \param fb_env : the filter matrix environment
     116             : !> \param qs_env : quickstep environment
     117             : !> \param scf_section : SCF input section
     118             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     119             : ! **************************************************************************************************
     120          10 :    SUBROUTINE fb_distribution_build(fb_env, qs_env, scf_section)
     121             :       TYPE(fb_env_obj), INTENT(INOUT)                    :: fb_env
     122             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     123             :       TYPE(section_vals_type), POINTER                   :: scf_section
     124             : 
     125             :       CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_build'
     126             : 
     127             :       INTEGER :: handle, i_common_set, iatom, ii, ipe, lb, lowest_cost_ind, my_pe, n_common_sets, &
     128             :          natoms, nhalo_atoms, nkinds, nprocs, owner_id_in_halo, pref_pe, ub
     129          10 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: common_set_ids, local_atoms_all, &
     130          10 :                                                             local_atoms_sizes, local_atoms_starts, &
     131          10 :                                                             pe, pos_in_preferred_list
     132          10 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms, local_atoms
     133             :       LOGICAL                                            :: acceptable_move, move_happened
     134             :       REAL(KIND=dp)                                      :: average_cost
     135          10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cost_per_atom, cost_per_proc
     136             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: pair_radii
     137          10 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: rcut
     138             :       TYPE(cell_type), POINTER                           :: cell
     139          10 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_ks
     140             :       TYPE(fb_atomic_halo_obj)                           :: atomic_halo
     141             :       TYPE(fb_distribution_element)                      :: element
     142             :       TYPE(fb_distribution_list), ALLOCATABLE, &
     143          10 :          DIMENSION(:)                                    :: dist
     144             :       TYPE(fb_preferred_procs_list), ALLOCATABLE, &
     145          10 :          DIMENSION(:)                                    :: preferred_procs_set
     146             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     147          10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     148          10 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     149             : 
     150          10 :       CALL timeset(routineN, handle)
     151             : 
     152          10 :       NULLIFY (mat_ks, rcut, cell, para_env, particle_set, qs_kind_set, &
     153          10 :                halo_atoms, local_atoms)
     154          10 :       CALL fb_atomic_halo_nullify(atomic_halo)
     155             : 
     156             :       ! obtain relevant data from fb_env, qs_env
     157             :       CALL fb_env_get(fb_env=fb_env, &
     158          10 :                       rcut=rcut)
     159             :       CALL get_qs_env(qs_env=qs_env, &
     160             :                       natom=natoms, &
     161             :                       particle_set=particle_set, &
     162             :                       qs_kind_set=qs_kind_set, &
     163             :                       nkind=nkinds, &
     164             :                       cell=cell, &
     165             :                       para_env=para_env, &
     166          10 :                       matrix_ks=mat_ks)
     167          10 :       nprocs = para_env%num_pe
     168          10 :       my_pe = para_env%mepos + 1 ! counting from 1
     169             : 
     170             :       ! for each global atom, build atomic halo and get the associated cost
     171          40 :       ALLOCATE (pair_radii(nkinds, nkinds))
     172          10 :       CALL fb_build_pair_radii(rcut, nkinds, pair_radii)
     173          10 :       CALL fb_atomic_halo_create(atomic_halo)
     174          30 :       ALLOCATE (cost_per_atom(natoms))
     175          90 :       DO iatom = 1, natoms
     176          80 :          CALL fb_atomic_halo_init(atomic_halo)
     177             :          CALL fb_atomic_halo_build_halo_atoms(iatom, &
     178             :                                               particle_set, &
     179             :                                               cell, &
     180             :                                               pair_radii, &
     181             :                                               halo_atoms, &
     182             :                                               nhalo_atoms, &
     183          80 :                                               owner_id_in_halo)
     184             :          CALL fb_atomic_halo_set(atomic_halo=atomic_halo, &
     185             :                                  owner_atom=iatom, &
     186             :                                  natoms=nhalo_atoms, &
     187          80 :                                  halo_atoms=halo_atoms)
     188          80 :          NULLIFY (halo_atoms)
     189         170 :          cost_per_atom(iatom) = fb_atomic_halo_cost(atomic_halo, particle_set, qs_kind_set)
     190             :       END DO
     191          10 :       DEALLOCATE (pair_radii)
     192          10 :       CALL fb_atomic_halo_release(atomic_halo)
     193             : 
     194             :       ! build the preferred_procs_set according to DBCSR mat H
     195         110 :       ALLOCATE (preferred_procs_set(natoms))
     196          30 :       ALLOCATE (common_set_ids(natoms))
     197             :       CALL fb_build_preferred_procs(mat_ks(1)%matrix, &
     198             :                                     natoms, &
     199             :                                     preferred_procs_set, &
     200             :                                     common_set_ids, &
     201          10 :                                     n_common_sets)
     202             : 
     203             :       ! for each atomic halo, construct distribution_element, and assign
     204             :       ! the element to a processors using preferred_procs_set in a
     205             :       ! round-robin manner
     206          50 :       ALLOCATE (dist(nprocs))
     207          30 :       DO ipe = 1, nprocs
     208          30 :          CALL fb_distribution_init(dist=dist(ipe))
     209             :       END DO
     210          30 :       ALLOCATE (pos_in_preferred_list(n_common_sets))
     211          20 :       pos_in_preferred_list(:) = 0
     212          90 :       DO iatom = 1, natoms
     213          80 :          element%id = iatom
     214          80 :          element%cost = cost_per_atom(iatom)
     215          80 :          i_common_set = common_set_ids(iatom)
     216             :          pos_in_preferred_list(i_common_set) = &
     217             :             MOD(pos_in_preferred_list(i_common_set), &
     218          80 :                 preferred_procs_set(iatom)%nprocs) + 1
     219          80 :          ipe = preferred_procs_set(iatom)%list(pos_in_preferred_list(i_common_set))
     220          90 :          CALL fb_distribution_add(dist(ipe), element)
     221             :       END DO
     222             : 
     223          10 :       DEALLOCATE (pos_in_preferred_list)
     224          10 :       DEALLOCATE (common_set_ids)
     225          10 :       DEALLOCATE (cost_per_atom)
     226             : 
     227             :       ! sort processors according to the overall cost of their assigned
     228             :       ! corresponding distribution
     229          30 :       ALLOCATE (cost_per_proc(nprocs))
     230          30 :       DO ipe = 1, nprocs
     231          30 :          cost_per_proc(ipe) = dist(ipe)%cost
     232             :       END DO
     233          30 :       ALLOCATE (pe(nprocs))
     234          10 :       CALL sort(cost_per_proc, nprocs, pe)
     235             :       ! now that cost_per_proc is sorted, ipe's no longer give mpi
     236             :       ! ranks, the correct one to use should be pe(ipe)
     237             : 
     238             :       ! work out the ideal average cost per proc if work load is evenly
     239             :       ! distributed
     240          30 :       average_cost = SUM(cost_per_proc)/REAL(nprocs, dp)
     241             : 
     242          10 :       DEALLOCATE (cost_per_proc)
     243             : 
     244             :       ! loop over the processors, starting with the highest cost, move
     245             :       ! atoms one by one:
     246             :       !   1. FIRST to the next processor in the preferred list that has
     247             :       !      cost below average. IF no such proc is found, THEN
     248             :       !   2. to the next procesor in the overall list that has cost
     249             :       !      below average.
     250             :       ! repeat until the cost on this processor is less than or equal
     251             :       ! to the average cost
     252          10 :       lowest_cost_ind = 1
     253          30 :       DO ipe = nprocs, 1, -1
     254          30 :          redistribute: DO WHILE (dist(pe(ipe))%cost .GT. average_cost)
     255           0 :             iatom = dist(pe(ipe))%list(lowest_cost_ind)%id
     256           0 :             move_happened = .FALSE.
     257             :             ! first try to move to a preferred process
     258           0 :             preferred: DO ii = 1, preferred_procs_set(iatom)%nprocs
     259           0 :                pref_pe = preferred_procs_set(iatom)%list(ii)
     260             :                acceptable_move = &
     261             :                   fb_distribution_acceptable_move(dist(pe(ipe)), &
     262             :                                                   dist(pe(ipe))%list(lowest_cost_ind), &
     263             :                                                   dist(pref_pe), &
     264           0 :                                                   average_cost)
     265           0 :                IF ((pref_pe .NE. pe(ipe)) .AND. acceptable_move) THEN
     266             :                   CALL fb_distribution_move(dist(pe(ipe)), &
     267             :                                             lowest_cost_ind, &
     268           0 :                                             dist(pref_pe))
     269             :                   move_happened = .TRUE.
     270             :                   EXIT preferred
     271             :                END IF
     272             :             END DO preferred
     273             :             ! if no preferred process is available, move to a proc in
     274             :             ! the sorted list that has cost less than average.  remember
     275             :             ! that some of the proc may have already taken redistributed
     276             :             ! atoms, and thus may become unavailable (full)
     277             :             IF (.NOT. move_happened) THEN
     278             :                ! searching from the proc with the least initial cost
     279           0 :                next_in_line: DO ii = 1, nprocs
     280             :                   acceptable_move = &
     281             :                      fb_distribution_acceptable_move(dist(pe(ipe)), &
     282             :                                                      dist(pe(ipe))%list(lowest_cost_ind), &
     283             :                                                      dist(pe(ii)), &
     284           0 :                                                      average_cost)
     285           0 :                   IF ((pe(ii) .NE. pe(ipe)) .AND. acceptable_move) THEN
     286             :                      CALL fb_distribution_move(dist(pe(ipe)), &
     287             :                                                lowest_cost_ind, &
     288           0 :                                                dist(pe(ii)))
     289           0 :                      move_happened = .TRUE.
     290           0 :                      EXIT next_in_line
     291             :                   END IF
     292             :                END DO next_in_line
     293             :             END IF
     294             :             ! if the atom cannot be moved, then this means it is too
     295             :             ! costly for all other processes to accept. When this
     296             :             ! happens we must stop the redistribution process for this
     297             :             ! processor---as all other of its atoms will be even more
     298             :             ! costly
     299          20 :             IF (.NOT. move_happened) THEN
     300             :                EXIT redistribute
     301             :             END IF
     302             :          END DO redistribute ! while
     303             :       END DO ! ipe
     304             : 
     305          10 :       DEALLOCATE (pe)
     306          90 :       DO ii = 1, SIZE(preferred_procs_set)
     307          90 :          CALL fb_preferred_procs_list_release(preferred_procs_set(ii))
     308             :       END DO
     309          10 :       DEALLOCATE (preferred_procs_set)
     310             : 
     311             :       ! generate local atoms from dist
     312          30 :       ALLOCATE (local_atoms_all(natoms))
     313          20 :       ALLOCATE (local_atoms_starts(nprocs))
     314          20 :       ALLOCATE (local_atoms_sizes(nprocs))
     315             :       CALL fb_distribution_to_local_atoms(dist, &
     316             :                                           local_atoms_all, &
     317             :                                           local_atoms_starts, &
     318          10 :                                           local_atoms_sizes)
     319          30 :       ALLOCATE (local_atoms(local_atoms_sizes(my_pe)))
     320          10 :       lb = local_atoms_starts(my_pe)
     321          10 :       ub = local_atoms_starts(my_pe) + local_atoms_sizes(my_pe) - 1
     322          50 :       local_atoms(1:local_atoms_sizes(my_pe)) = local_atoms_all(lb:ub)
     323             :       CALL fb_env_set(fb_env=fb_env, &
     324             :                       local_atoms=local_atoms, &
     325          10 :                       nlocal_atoms=local_atoms_sizes(my_pe))
     326             : 
     327             :       ! write out info
     328          10 :       CALL fb_distribution_write_info(dist, scf_section)
     329             : 
     330          10 :       DEALLOCATE (local_atoms_all)
     331          10 :       DEALLOCATE (local_atoms_starts)
     332          10 :       DEALLOCATE (local_atoms_sizes)
     333          30 :       DO ipe = 1, SIZE(dist)
     334          30 :          CALL fb_distribution_release(dist(ipe))
     335             :       END DO
     336          10 :       DEALLOCATE (dist)
     337             : 
     338          10 :       CALL timestop(handle)
     339             : 
     340          20 :    END SUBROUTINE fb_distribution_build
     341             : 
     342             : ! **************************************************************************************************
     343             : !> \brief Checks if moving an element from one distribution to another is
     344             : !>        allowed in mind of load balancing.
     345             : !> \param dist_from : the source distribution
     346             : !> \param element   : the element in source distribution considered for the
     347             : !>                    move
     348             : !> \param dist_to   : the destination distribution
     349             : !> \param threshold ...
     350             : !> \return : TRUE or FALSE
     351             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     352             : ! **************************************************************************************************
     353           0 :    PURE FUNCTION fb_distribution_acceptable_move(dist_from, &
     354             :                                                  element, &
     355             :                                                  dist_to, &
     356             :                                                  threshold) &
     357             :       RESULT(acceptable)
     358             :       TYPE(fb_distribution_list), INTENT(IN)             :: dist_from
     359             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     360             :       TYPE(fb_distribution_list), INTENT(IN)             :: dist_to
     361             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     362             :       LOGICAL                                            :: acceptable
     363             : 
     364             :       acceptable = (dist_to%cost + element%cost .LT. dist_from%cost) .AND. &
     365           0 :                    (dist_to%cost .LT. threshold)
     366           0 :    END FUNCTION fb_distribution_acceptable_move
     367             : 
     368             : ! **************************************************************************************************
     369             : !> \brief Write out information on the load distribution on processors
     370             : !> \param dist_set    : set of distributions for the processors
     371             : !> \param scf_section : SCF input section
     372             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     373             : ! **************************************************************************************************
     374          10 :    SUBROUTINE fb_distribution_write_info(dist_set, scf_section)
     375             :       TYPE(fb_distribution_list), DIMENSION(:), &
     376             :          INTENT(IN)                                      :: dist_set
     377             :       TYPE(section_vals_type), POINTER                   :: scf_section
     378             : 
     379             :       INTEGER                                            :: ii, max_natoms, min_natoms, natoms, &
     380             :                                                             nprocs, unit_nr
     381             :       REAL(KIND=dp)                                      :: ave_cost, ave_natoms, max_cost, &
     382             :                                                             min_cost, total_cost
     383             :       TYPE(cp_logger_type), POINTER                      :: logger
     384             : 
     385          10 :       nprocs = SIZE(dist_set)
     386          10 :       natoms = 0
     387          10 :       total_cost = 0.0_dp
     388          30 :       DO ii = 1, nprocs
     389          20 :          natoms = natoms + dist_set(ii)%nelements
     390          30 :          total_cost = total_cost + dist_set(ii)%cost
     391             :       END DO
     392          10 :       ave_natoms = REAL(natoms, dp)/REAL(nprocs, dp)
     393          10 :       ave_cost = total_cost/REAL(nprocs, dp)
     394          10 :       max_natoms = 0
     395          10 :       max_cost = 0._dp
     396          30 :       DO ii = 1, nprocs
     397          20 :          max_natoms = MAX(max_natoms, dist_set(ii)%nelements)
     398          30 :          max_cost = MAX(max_cost, dist_set(ii)%cost)
     399             :       END DO
     400          10 :       min_natoms = natoms
     401          10 :       min_cost = total_cost
     402          30 :       DO ii = 1, nprocs
     403          20 :          min_natoms = MIN(min_natoms, dist_set(ii)%nelements)
     404          30 :          min_cost = MIN(min_cost, dist_set(ii)%cost)
     405             :       END DO
     406             : 
     407          10 :       logger => cp_get_default_logger()
     408             :       unit_nr = cp_print_key_unit_nr(logger, scf_section, &
     409             :                                      "PRINT%FILTER_MATRIX", &
     410          10 :                                      extension="")
     411             : 
     412          10 :       IF (unit_nr > 0) THEN
     413             :          WRITE (UNIT=unit_nr, FMT="(/,A,I6,A)") &
     414           5 :             " FILTER_MAT_DIAG| Load distribution across ", nprocs, " processors:"
     415             :          WRITE (UNIT=unit_nr, &
     416             :                 FMT="(A,T40,A,T55,A,T70,A,T85,A)") &
     417           5 :             " FILTER_MAT_DIAG| ", "Total", "Average", "Max", "Min"
     418             :          WRITE (UNIT=unit_nr, &
     419             :                 FMT="(A,T40,I12,T55,F12.1,T70,I12,T85,I10)") &
     420           5 :             " FILTER_MAT_DIAG|   Atomic Matrices", &
     421          10 :             natoms, ave_natoms, max_natoms, min_natoms
     422             :          WRITE (UNIT=unit_nr, &
     423             :                 FMT="(A,T40,D12.7,T55,D12.7,T70,D12.7,T85,D12.7)") &
     424           5 :             " FILTER_MAT_DIAG|   Cost*", &
     425          10 :             total_cost, ave_cost, max_cost, min_cost
     426             :          WRITE (UNIT=unit_nr, FMT="(A)") &
     427           5 :             " FILTER_MAT_DIAG| (* cost is calculated as sum of cube of atomic matrix sizes)"
     428             :       END IF
     429             :       CALL cp_print_key_finished_output(unit_nr, logger, scf_section, &
     430          10 :                                         "PRINT%FILTER_MATRIX")
     431          10 :    END SUBROUTINE fb_distribution_write_info
     432             : 
     433             : ! **************************************************************************************************
     434             : !> \brief Build the preferred list of processors for atoms
     435             : !> \param dbcsr_mat   : the reference DBCSR matrix, from which the local block
     436             : !>                      cols and the processor maps are obtained
     437             : !> \param natoms      : total number of atoms globally
     438             : !> \param preferred_procs_set : set of preferred procs list for each atom
     439             : !> \param common_set_ids : atoms (block cols) local to the same processor grid
     440             : !>                         col will have the same preferred list. This list
     441             : !>                         maps each atom to their corresponding group
     442             : !> \param n_common_sets  : number of unique preferred lists (groups)
     443             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     444             : ! **************************************************************************************************
     445          30 :    SUBROUTINE fb_build_preferred_procs(dbcsr_mat, &
     446             :                                        natoms, &
     447          10 :                                        preferred_procs_set, &
     448          10 :                                        common_set_ids, &
     449             :                                        n_common_sets)
     450             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     451             :       INTEGER, INTENT(IN)                                :: natoms
     452             :       TYPE(fb_preferred_procs_list), DIMENSION(:), &
     453             :          INTENT(INOUT)                                   :: preferred_procs_set
     454             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: common_set_ids
     455             :       INTEGER, INTENT(OUT)                               :: n_common_sets
     456             : 
     457             :       INTEGER                                            :: icol, nprows, pcol, prow
     458          10 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist
     459          10 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     460             :       LOGICAL                                            :: check_ok
     461             :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
     462             : 
     463          10 :       check_ok = natoms .LE. dbcsr_nblkcols_total(dbcsr_mat)
     464           0 :       CPASSERT(check_ok)
     465          10 :       check_ok = SIZE(preferred_procs_set) .GE. natoms
     466          10 :       CPASSERT(check_ok)
     467          10 :       check_ok = SIZE(common_set_ids) .GE. natoms
     468          10 :       CPASSERT(check_ok)
     469             : 
     470          10 :       CALL dbcsr_get_info(dbcsr_mat, distribution=dbcsr_dist, proc_col_dist=col_dist)
     471          10 :       CALL dbcsr_distribution_get(dbcsr_dist, pgrid=pgrid, nprows=nprows, npcols=n_common_sets)
     472             : 
     473          90 :       DO icol = 1, natoms
     474          80 :          IF (ASSOCIATED(preferred_procs_set(icol)%list)) THEN
     475           0 :             DEALLOCATE (preferred_procs_set(icol)%list)
     476             :          END IF
     477         240 :          ALLOCATE (preferred_procs_set(icol)%list(nprows))
     478          80 :          pcol = col_dist(icol)
     479             :          ! dbcsr prow and pcol counts from 0
     480         240 :          DO prow = 0, nprows - 1
     481             :             ! here, we count processes from 1, so +1 from mpirank
     482         240 :             preferred_procs_set(icol)%list(prow + 1) = pgrid(prow, pcol) + 1
     483             :          END DO
     484          90 :          preferred_procs_set(icol)%nprocs = nprows
     485             :       END DO
     486             : 
     487          90 :       common_set_ids(:) = 0
     488          90 :       common_set_ids(1:natoms) = col_dist(1:natoms) + 1
     489             : 
     490          10 :    END SUBROUTINE fb_build_preferred_procs
     491             : 
     492             : ! **************************************************************************************************
     493             : !> \brief Release a preferred_procs_list
     494             : !> \param preferred_procs_list  : the preferred procs list in question
     495             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     496             : ! **************************************************************************************************
     497          80 :    SUBROUTINE fb_preferred_procs_list_release(preferred_procs_list)
     498             :       TYPE(fb_preferred_procs_list), INTENT(INOUT)       :: preferred_procs_list
     499             : 
     500          80 :       IF (ASSOCIATED(preferred_procs_list%list)) THEN
     501          80 :          DEALLOCATE (preferred_procs_list%list)
     502             :       END IF
     503          80 :    END SUBROUTINE fb_preferred_procs_list_release
     504             : 
     505             : ! **************************************************************************************************
     506             : !> \brief Convert distribution data to 1D array containing information of
     507             : !>        which atoms are distributed to which processor
     508             : !> \param dist_set    : set of distributions for the processors
     509             : !> \param local_atoms : continuous array of atoms arranged in order
     510             : !>                      corresponding their allocated processors
     511             : !> \param local_atoms_starts : starting position in local_atoms array for
     512             : !>                             each processor
     513             : !> \param local_atoms_sizes  : number of atoms local to each processor
     514             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     515             : ! **************************************************************************************************
     516          10 :    SUBROUTINE fb_distribution_to_local_atoms(dist_set, &
     517          20 :                                              local_atoms, &
     518          10 :                                              local_atoms_starts, &
     519          10 :                                              local_atoms_sizes)
     520             :       TYPE(fb_distribution_list), DIMENSION(:), &
     521             :          INTENT(IN)                                      :: dist_set
     522             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: local_atoms, local_atoms_starts, &
     523             :                                                             local_atoms_sizes
     524             : 
     525             :       INTEGER                                            :: iatom, ipe, n_procs, pos
     526             :       LOGICAL                                            :: check_ok
     527             : 
     528          10 :       n_procs = SIZE(dist_set)
     529             : 
     530          10 :       check_ok = SIZE(local_atoms_starts) .GE. n_procs
     531          10 :       CPASSERT(check_ok)
     532          10 :       check_ok = SIZE(local_atoms_sizes) .GE. n_procs
     533          10 :       CPASSERT(check_ok)
     534             : 
     535          90 :       local_atoms(:) = 0
     536          30 :       local_atoms_starts(:) = 0
     537          30 :       local_atoms_sizes(:) = 0
     538             : 
     539             :       pos = 1
     540          30 :       DO ipe = 1, n_procs
     541          20 :          local_atoms_starts(ipe) = pos
     542         110 :          DO iatom = 1, dist_set(ipe)%nelements
     543          80 :             local_atoms(pos) = dist_set(ipe)%list(iatom)%id
     544          80 :             pos = pos + 1
     545         100 :             local_atoms_sizes(ipe) = local_atoms_sizes(ipe) + 1
     546             :          END DO
     547             :       END DO
     548          10 :    END SUBROUTINE fb_distribution_to_local_atoms
     549             : 
     550             : ! **************************************************************************************************
     551             : !> \brief Initialise a distribution
     552             : !> \param dist        : the distribution in question
     553             : !> \param nmax        : [OPTIONAL] size of the list array to be allocated
     554             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     555             : ! **************************************************************************************************
     556          20 :    SUBROUTINE fb_distribution_init(dist, nmax)
     557             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     558             :       INTEGER, INTENT(IN), OPTIONAL                      :: nmax
     559             : 
     560             :       INTEGER                                            :: ii, my_nmax
     561             : 
     562          20 :       my_nmax = 0
     563          20 :       IF (PRESENT(nmax)) my_nmax = nmax
     564          20 :       IF (ASSOCIATED(dist%list)) THEN
     565           0 :          DEALLOCATE (dist%list)
     566             :       END IF
     567          20 :       NULLIFY (dist%list)
     568          20 :       IF (my_nmax .GT. 0) THEN
     569           0 :          ALLOCATE (dist%list(my_nmax))
     570           0 :          DO ii = 1, SIZE(dist%list)
     571           0 :             dist%list(ii)%id = 0
     572           0 :             dist%list(ii)%cost = 0.0_dp
     573             :          END DO
     574             :       END IF
     575          20 :       dist%nelements = 0
     576          20 :       dist%cost = 0.0_dp
     577          20 :    END SUBROUTINE fb_distribution_init
     578             : 
     579             : ! **************************************************************************************************
     580             : !> \brief Resize the list array in a distribution
     581             : !> \param dist        : The distribution in question
     582             : !> \param nmax        : new size of the list array
     583             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     584             : ! **************************************************************************************************
     585          60 :    SUBROUTINE fb_distribution_resize(dist, nmax)
     586             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     587             :       INTEGER, INTENT(IN)                                :: nmax
     588             : 
     589             :       INTEGER                                            :: ii, my_nmax
     590             :       TYPE(fb_distribution_element), DIMENSION(:), &
     591          60 :          POINTER                                         :: new_list
     592             : 
     593          60 :       IF (.NOT. ASSOCIATED(dist%list)) THEN
     594          20 :          my_nmax = MAX(nmax, 1)
     595          80 :          ALLOCATE (dist%list(my_nmax))
     596             :       ELSE
     597          40 :          my_nmax = MAX(nmax, dist%nelements)
     598         240 :          ALLOCATE (new_list(my_nmax))
     599         160 :          DO ii = 1, SIZE(new_list)
     600         120 :             new_list(ii)%id = 0
     601         160 :             new_list(ii)%cost = 0.0_dp
     602             :          END DO
     603         100 :          DO ii = 1, dist%nelements
     604         100 :             new_list(ii) = dist%list(ii)
     605             :          END DO
     606          40 :          DEALLOCATE (dist%list)
     607          40 :          dist%list => new_list
     608             :       END IF
     609          60 :    END SUBROUTINE fb_distribution_resize
     610             : 
     611             : ! **************************************************************************************************
     612             : !> \brief Add an atom (element) to a distribution
     613             : !> \param dist        : the distribution in question
     614             : !> \param element     : the element to be added
     615             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     616             : ! **************************************************************************************************
     617          80 :    SUBROUTINE fb_distribution_add(dist, element)
     618             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     619             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     620             : 
     621             :       INTEGER                                            :: ii, new_nelements, pos
     622             : 
     623          80 :       new_nelements = dist%nelements + 1
     624             : 
     625             :       ! resize list if necessary
     626          80 :       IF (.NOT. ASSOCIATED(dist%list)) THEN
     627          20 :          CALL fb_distribution_resize(dist, new_nelements)
     628          60 :       ELSE IF (new_nelements*ENLARGE_RATIO .GT. SIZE(dist%list)) THEN
     629          40 :          CALL fb_distribution_resize(dist, SIZE(dist%list)*EXPAND_FACTOR)
     630             :       END IF
     631             :       ! assuming the list of elements is always sorted with respect to cost
     632             :       ! slot the new element into the appropriate spot
     633          80 :       IF (new_nelements == 1) THEN
     634          20 :          dist%list(1) = element
     635             :       ELSE
     636          60 :          pos = fb_distribution_find_slot(dist, element)
     637          60 :          DO ii = dist%nelements, pos, -1
     638          60 :             dist%list(ii + 1) = dist%list(ii)
     639             :          END DO
     640          60 :          dist%list(pos) = element
     641             :       END IF
     642          80 :       dist%nelements = new_nelements
     643          80 :       dist%cost = dist%cost + element%cost
     644          80 :    END SUBROUTINE fb_distribution_add
     645             : 
     646             : ! **************************************************************************************************
     647             : !> \brief Find the correct slot in the list array to add a new element, so that
     648             : !>        the list will always be ordered with respect to cost
     649             : !> \param dist        : the distribution in question
     650             : !> \param element     : element to be added
     651             : !> \return : the correct position to add the new element
     652             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     653             : ! **************************************************************************************************
     654          60 :    PURE FUNCTION fb_distribution_find_slot(dist, element) RESULT(pos)
     655             :       TYPE(fb_distribution_list), INTENT(IN)             :: dist
     656             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     657             :       INTEGER                                            :: pos
     658             : 
     659             :       INTEGER                                            :: lower, middle, N, upper
     660             : 
     661          60 :       N = dist%nelements
     662          60 :       IF (element%cost .LT. dist%list(1)%cost) THEN
     663          60 :          pos = 1
     664             :          RETURN
     665             :       END IF
     666          60 :       IF (element%cost .GE. dist%list(N)%cost) THEN
     667          60 :          pos = N + 1
     668          60 :          RETURN
     669             :       END IF
     670             :       lower = 1
     671             :       upper = N
     672           0 :       DO WHILE ((upper - lower) .GT. 1)
     673           0 :          middle = (lower + upper)/2
     674           0 :          IF (element%cost .LT. dist%list(middle)%cost) THEN
     675             :             upper = middle
     676             :          ELSE
     677           0 :             lower = middle
     678             :          END IF
     679             :       END DO
     680          60 :       pos = upper
     681             :    END FUNCTION fb_distribution_find_slot
     682             : 
     683             : ! **************************************************************************************************
     684             : !> \brief Remove the pos-th element from a distribution
     685             : !> \param dist        : the distribution in question
     686             : !> \param pos         : index of the element in the list array
     687             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     688             : ! **************************************************************************************************
     689           0 :    SUBROUTINE fb_distribution_remove_ind(dist, pos)
     690             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     691             :       INTEGER, INTENT(IN)                                :: pos
     692             : 
     693             :       INTEGER                                            :: ii
     694             :       LOGICAL                                            :: check_ok
     695             : 
     696           0 :       check_ok = pos .GT. 0
     697           0 :       CPASSERT(check_ok)
     698           0 :       IF (pos .LE. dist%nelements) THEN
     699           0 :          dist%cost = dist%cost - dist%list(pos)%cost
     700           0 :          DO ii = pos, dist%nelements - 1
     701           0 :             dist%list(ii) = dist%list(ii + 1)
     702             :          END DO
     703           0 :          dist%list(dist%nelements)%id = 0
     704           0 :          dist%list(dist%nelements)%cost = 0.0_dp
     705           0 :          dist%nelements = dist%nelements - 1
     706             :          ! auto resize if required
     707           0 :          IF (dist%nelements*REDUCE_RATIO .LT. SIZE(dist%list)) THEN
     708           0 :             CALL fb_distribution_resize(dist, dist%nelements/SHRINK_FACTOR)
     709             :          END IF
     710             :       END IF
     711           0 :    END SUBROUTINE fb_distribution_remove_ind
     712             : 
     713             : ! **************************************************************************************************
     714             : !> \brief Remove a given element from a distribution
     715             : !> \param dist        : the distribution in question
     716             : !> \param element     : the element in question
     717             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     718             : ! **************************************************************************************************
     719           0 :    SUBROUTINE fb_distribution_remove_el(dist, element)
     720             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     721             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     722             : 
     723             :       INTEGER                                            :: ii, pos
     724             : 
     725           0 :       pos = dist%nelements + 1
     726           0 :       DO ii = 1, dist%nelements
     727           0 :          IF (element%id == dist%list(ii)%id) THEN
     728           0 :             pos = ii
     729           0 :             EXIT
     730             :          END IF
     731             :       END DO
     732           0 :       CALL fb_distribution_remove_ind(dist, pos)
     733           0 :    END SUBROUTINE fb_distribution_remove_el
     734             : 
     735             : ! **************************************************************************************************
     736             : !> \brief Move the pos-th element from a distribution to another
     737             : !> \param dist_from   : the source distribution
     738             : !> \param pos         : index of the element in the source distribution
     739             : !> \param dist_to     : the destination distribution
     740             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     741             : ! **************************************************************************************************
     742           0 :    SUBROUTINE fb_distribution_move_ind(dist_from, pos, dist_to)
     743             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
     744             :       INTEGER, INTENT(IN)                                :: pos
     745             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
     746             : 
     747             :       LOGICAL                                            :: check_ok
     748             :       TYPE(fb_distribution_element)                      :: element
     749             : 
     750           0 :       check_ok = ASSOCIATED(dist_from%list)
     751           0 :       CPASSERT(check_ok)
     752           0 :       check_ok = pos .LE. dist_from%nelements
     753           0 :       CPASSERT(check_ok)
     754           0 :       element = dist_from%list(pos)
     755           0 :       CALL fb_distribution_add(dist_to, element)
     756           0 :       CALL fb_distribution_remove(dist_from, pos)
     757           0 :    END SUBROUTINE fb_distribution_move_ind
     758             : 
     759             : ! **************************************************************************************************
     760             : !> \brief Move a given element from a distribution to another
     761             : !> \param dist_from   : the source distribution
     762             : !> \param element     : the element in question
     763             : !> \param dist_to     : the destination distribution
     764             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     765             : ! **************************************************************************************************
     766           0 :    SUBROUTINE fb_distribution_move_el(dist_from, element, dist_to)
     767             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
     768             :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     769             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
     770             : 
     771             :       LOGICAL                                            :: check_ok
     772             : 
     773           0 :       check_ok = ASSOCIATED(dist_from%list)
     774           0 :       CPASSERT(check_ok)
     775           0 :       CALL fb_distribution_add(dist_to, element)
     776           0 :       CALL fb_distribution_remove(dist_from, element)
     777           0 :    END SUBROUTINE fb_distribution_move_el
     778             : 
     779             : ! **************************************************************************************************
     780             : !> \brief Release a distribution
     781             : !> \param dist  : the distribution in question
     782             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     783             : ! **************************************************************************************************
     784          20 :    SUBROUTINE fb_distribution_release(dist)
     785             :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     786             : 
     787          20 :       IF (ASSOCIATED(dist%list)) THEN
     788          20 :          DEALLOCATE (dist%list)
     789             :       END IF
     790          20 :    END SUBROUTINE fb_distribution_release
     791             : 
     792           0 : END MODULE qs_fb_distribution_methods

Generated by: LCOV version 1.15