LCOV - code coverage report
Current view: top level - src - qs_fb_com_tasks_types.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:2fce0f8) Lines: 362 386 93.8 %
Date: 2024-12-21 06:28:57 Functions: 21 29 72.4 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : MODULE qs_fb_com_tasks_types
       9             : 
      10             :    USE cp_dbcsr_api,                    ONLY: dbcsr_get_block_p,&
      11             :                                               dbcsr_get_info,&
      12             :                                               dbcsr_put_block,&
      13             :                                               dbcsr_type
      14             :    USE kinds,                           ONLY: dp,&
      15             :                                               int_4,&
      16             :                                               int_8
      17             :    USE memory_utilities,                ONLY: reallocate
      18             :    USE message_passing,                 ONLY: mp_para_env_type
      19             :    USE qs_fb_matrix_data_types,         ONLY: fb_matrix_data_add,&
      20             :                                               fb_matrix_data_get,&
      21             :                                               fb_matrix_data_has_data,&
      22             :                                               fb_matrix_data_obj
      23             :    USE util,                            ONLY: sort
      24             : #include "./base/base_uses.f90"
      25             : 
      26             :    IMPLICIT NONE
      27             : 
      28             :    PRIVATE
      29             : 
      30             : ! public parameters:
      31             :    PUBLIC :: TASK_N_RECORDS, &
      32             :              TASK_DEST, &
      33             :              TASK_SRC, &
      34             :              TASK_PAIR, &
      35             :              TASK_COST
      36             : 
      37             : ! public types
      38             :    PUBLIC :: fb_com_tasks_obj, &
      39             :              fb_com_atom_pairs_obj
      40             : 
      41             : ! public methods
      42             : !API
      43             :    PUBLIC :: fb_com_tasks_release, &
      44             :              fb_com_tasks_nullify, &
      45             :              fb_com_tasks_create, &
      46             :              fb_com_tasks_get, &
      47             :              fb_com_tasks_set, &
      48             :              fb_com_tasks_transpose_dest_src, &
      49             :              fb_com_tasks_build_atom_pairs, &
      50             :              fb_com_tasks_encode_pair, &
      51             :              fb_com_tasks_decode_pair, &
      52             :              fb_com_atom_pairs_release, &
      53             :              fb_com_atom_pairs_nullify, &
      54             :              fb_com_atom_pairs_has_data, &
      55             :              fb_com_atom_pairs_create, &
      56             :              fb_com_atom_pairs_init, &
      57             :              fb_com_atom_pairs_get, &
      58             :              fb_com_atom_pairs_decode, &
      59             :              fb_com_atom_pairs_calc_buffer_sizes, &
      60             :              fb_com_atom_pairs_gather_blks, &
      61             :              fb_com_atom_pairs_distribute_blks
      62             : 
      63             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_com_tasks_types'
      64             : 
      65             : ! **********************************************************************
      66             : ! explanation on format of task lists (same for tasks_recv and tasks_send):
      67             : ! tasks_recv has dimension (4, ntasks_recv), and stores information on
      68             : ! the block to be copied or transferred
      69             : ! - tasks_recv(TASK_DEST,itask) = destination MPI rank of itask-th task
      70             : ! - tasks_recv(TASK_SRC,itask) = source MPI rank of itask-th task
      71             : ! - tasks_recv(TASK_PAIR,itask) = compressed pair indices of the block of itask-th task
      72             : ! - tasks_recv(TASK_COST,itask) = the cost of itask-th task
      73             : !
      74             : ! number of record slots in each task in the task lists
      75             :    INTEGER, PARAMETER :: TASK_N_RECORDS = 4
      76             : ! the indices for the records (1:TASK_DIM) in a task
      77             :    INTEGER, PARAMETER :: TASK_DEST = 1, &
      78             :                          TASK_SRC = 2, &
      79             :                          TASK_PAIR = 3, &
      80             :                          TASK_COST = 4
      81             : ! **********************************************************************
      82             : 
      83             : ! **********************************************************************
      84             : !> \brief data content for communication tasks used for send and receive
      85             : !>        matrix blocks
      86             : !> \param tasks     : the list of communication tasks, which is
      87             : !>                    represented by a 2D array, first dim stores
      88             : !>                    info for the communication: src and desc procs
      89             : !>                    and the atomic pair indexing the matrix block
      90             : !>                    to be communicated, etc.
      91             : !> \param task_dim  : the size of the first dimension of tasks
      92             : !> \param ntasks    : total number of local tasks
      93             : !> \param nencode   : the total number of atoms used for encoding
      94             : !>                    the block coordinates (iatom, jatom)
      95             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      96             : ! **********************************************************************
      97             :    TYPE fb_com_tasks_data
      98             :       ! use pure integer arrays to facilitate easier MPI coms
      99             :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER :: tasks => NULL()
     100             :       INTEGER :: task_dim = -1
     101             :       INTEGER :: ntasks = -1
     102             :       INTEGER :: nencode = -1
     103             :    END TYPE fb_com_tasks_data
     104             : 
     105             : !**********************************************************************
     106             : !> \brief defines a fb_com_tasks object
     107             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     108             : !**********************************************************************
     109             :    TYPE fb_com_tasks_obj
     110             :       TYPE(fb_com_tasks_data), POINTER, PRIVATE :: obj => NULL()
     111             :    END TYPE fb_com_tasks_obj
     112             : 
     113             : ! **********************************************************************
     114             : !> \brief data content for the list of block coordinates with the
     115             : !>        associated src/dest proc id for communication. These will be
     116             : !>        generated from the fb_com_tasks object
     117             : !> \param pairs         : the list of communication tasks, which is
     118             : !>                        represented by a 2D array, first dim stores
     119             : !>                        info for the communication: src and desc procs
     120             : !>                        and the atomic pair indexing the matrix block
     121             : !>                        to be communicated, etc.
     122             : !> \param npairs        : number of blks to be communicated in the atom
     123             : !>                        pair list
     124             : !> \param natoms_encode : the total number of atoms used for encoding
     125             : !>                        the proc + block coordinates (pe, iatom, jatom)
     126             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     127             : ! **********************************************************************
     128             :    TYPE fb_com_atom_pairs_data
     129             :       INTEGER(KIND=int_8), DIMENSION(:), POINTER :: pairs => NULL()
     130             :       INTEGER :: npairs = -1
     131             :       INTEGER :: natoms_encode = -1
     132             :    END TYPE fb_com_atom_pairs_data
     133             : 
     134             : ! **********************************************************************
     135             : !> \brief defines a fb_com_atom_pairs object
     136             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     137             : ! **********************************************************************
     138             :    TYPE fb_com_atom_pairs_obj
     139             :       TYPE(fb_com_atom_pairs_data), POINTER, PRIVATE :: obj => NULL()
     140             :    END TYPE fb_com_atom_pairs_obj
     141             : 
     142             : CONTAINS
     143             : 
     144             : ! **********************************************************************
     145             : !> \brief Releases an fb_com_tasks object
     146             : !> \param com_tasks the fb_com_tasks object, its content must not be
     147             : !>                   UNDEFINED, and the subroutine does nothing if the
     148             : !>                   content points to NULL
     149             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     150             : ! **************************************************************************************************
     151        1632 :    SUBROUTINE fb_com_tasks_release(com_tasks)
     152             :       TYPE(fb_com_tasks_obj), INTENT(INOUT)              :: com_tasks
     153             : 
     154        1632 :       IF (ASSOCIATED(com_tasks%obj)) THEN
     155        1632 :          IF (ASSOCIATED(com_tasks%obj%tasks)) THEN
     156        1632 :             DEALLOCATE (com_tasks%obj%tasks)
     157             :          END IF
     158        1632 :          DEALLOCATE (com_tasks%obj)
     159             :       ELSE
     160           0 :          NULLIFY (com_tasks%obj)
     161             :       END IF
     162        1632 :    END SUBROUTINE fb_com_tasks_release
     163             : 
     164             : ! **********************************************************************
     165             : !> \brief Releases an fb_com_atom_pairs object
     166             : !> \param atom_pairs the fb_com_atom_pairs object, its content must not
     167             : !>                    be UNDEFINED, and the subroutine does nothing if
     168             : !>                    the content points to NULL
     169             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     170             : ! **************************************************************************************************
     171        1600 :    SUBROUTINE fb_com_atom_pairs_release(atom_pairs)
     172             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs
     173             : 
     174        1600 :       IF (ASSOCIATED(atom_pairs%obj)) THEN
     175        1600 :          IF (ASSOCIATED(atom_pairs%obj%pairs)) THEN
     176        1600 :             DEALLOCATE (atom_pairs%obj%pairs)
     177             :          END IF
     178        1600 :          DEALLOCATE (atom_pairs%obj)
     179             :       ELSE
     180           0 :          NULLIFY (atom_pairs%obj)
     181             :       END IF
     182        1600 :    END SUBROUTINE fb_com_atom_pairs_release
     183             : 
     184             : ! **********************************************************************
     185             : !> \brief Nullifies a fb_com_tasks object, note that it does not release
     186             : !>        the original object. This procedure is used to nullify the
     187             : !>        pointer contained in the object which is used to associate to
     188             : !>        the actual object content
     189             : !> \param com_tasks the com_tasks object
     190             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     191             : ! **************************************************************************************************
     192        1632 :    SUBROUTINE fb_com_tasks_nullify(com_tasks)
     193             :       TYPE(fb_com_tasks_obj), INTENT(INOUT)              :: com_tasks
     194             : 
     195        1632 :       NULLIFY (com_tasks%obj)
     196        1632 :    END SUBROUTINE fb_com_tasks_nullify
     197             : 
     198             : ! **********************************************************************
     199             : !> \brief Nullifies a fb_com_atom_pairs object, note that it does not
     200             : !>        release the original object. This procedure is used to nullify
     201             : !>        the pointer contained in the object which is used to associate
     202             : !>        to the actual object content
     203             : !> \param atom_pairs the fb_com_atom_pairs object
     204             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     205             : ! **************************************************************************************************
     206        1600 :    SUBROUTINE fb_com_atom_pairs_nullify(atom_pairs)
     207             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs
     208             : 
     209        1600 :       NULLIFY (atom_pairs%obj)
     210        1600 :    END SUBROUTINE fb_com_atom_pairs_nullify
     211             : 
     212             : ! **********************************************************************
     213             : !> \brief Associates one fb_com_tasks object to another
     214             : !> \param a the fb_com_tasks object to be associated
     215             : !> \param b the fb_com_tasks object that a is to be associated to
     216             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     217             : ! **************************************************************************************************
     218           0 :    SUBROUTINE fb_com_tasks_associate(a, b)
     219             :       TYPE(fb_com_tasks_obj), INTENT(OUT)                :: a
     220             :       TYPE(fb_com_tasks_obj), INTENT(IN)                 :: b
     221             : 
     222           0 :       a%obj => b%obj
     223           0 :    END SUBROUTINE fb_com_tasks_associate
     224             : 
     225             : ! **********************************************************************
     226             : !> \brief Associates one fb_com_atom_pairs object to another
     227             : !> \param a the fb_com_atom_pairs object to be associated
     228             : !> \param b the fb_com_atom_pairs object that a is to be associated to
     229             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     230             : ! **************************************************************************************************
     231           0 :    SUBROUTINE fb_com_atom_pairs_associate(a, b)
     232             :       TYPE(fb_com_atom_pairs_obj), INTENT(OUT)           :: a
     233             :       TYPE(fb_com_atom_pairs_obj), INTENT(IN)            :: b
     234             : 
     235           0 :       a%obj => b%obj
     236           0 :    END SUBROUTINE fb_com_atom_pairs_associate
     237             : 
     238             : ! **********************************************************************
     239             : !> \brief Checks if a fb_com_tasks object is associated with an actual
     240             : !>        data content or not
     241             : !> \param com_tasks the fb_com_tasks object
     242             : !> \return ...
     243             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     244             : ! **************************************************************************************************
     245           0 :    FUNCTION fb_com_tasks_has_data(com_tasks) RESULT(res)
     246             :       TYPE(fb_com_tasks_obj), INTENT(IN)                 :: com_tasks
     247             :       LOGICAL                                            :: res
     248             : 
     249           0 :       res = ASSOCIATED(com_tasks%obj)
     250           0 :    END FUNCTION fb_com_tasks_has_data
     251             : 
     252             : ! **********************************************************************
     253             : !> \brief Checks if a fb_com_atom_pairs object is associated with an actual
     254             : !>        data content or not
     255             : !> \param atom_pairs the fb_com_atom_pairs object
     256             : !> \return ...
     257             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     258             : ! **************************************************************************************************
     259        4992 :    FUNCTION fb_com_atom_pairs_has_data(atom_pairs) RESULT(res)
     260             :       TYPE(fb_com_atom_pairs_obj), INTENT(IN)            :: atom_pairs
     261             :       LOGICAL                                            :: res
     262             : 
     263        4992 :       res = ASSOCIATED(atom_pairs%obj)
     264        4992 :    END FUNCTION fb_com_atom_pairs_has_data
     265             : 
     266             : ! **********************************************************************
     267             : !> \brief Creates and initialises an empty fb_com_tasks object
     268             : !> \param com_tasks the fb_com_tasks object, its content must be NULL
     269             : !>                   and cannot be UNDEFINED
     270             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     271             : ! **************************************************************************************************
     272        1632 :    SUBROUTINE fb_com_tasks_create(com_tasks)
     273             :       TYPE(fb_com_tasks_obj), INTENT(INOUT)              :: com_tasks
     274             : 
     275        1632 :       CPASSERT(.NOT. ASSOCIATED(com_tasks%obj))
     276        1632 :       ALLOCATE (com_tasks%obj)
     277        1632 :       com_tasks%obj%task_dim = TASK_N_RECORDS
     278        1632 :       com_tasks%obj%ntasks = 0
     279        1632 :       com_tasks%obj%nencode = 0
     280             :       NULLIFY (com_tasks%obj%tasks)
     281        1632 :    END SUBROUTINE fb_com_tasks_create
     282             : 
     283             : ! **********************************************************************
     284             : !> \brief Creates and initialises an empty fb_com_atom_pairs object
     285             : !> \param atom_pairs the fb_com_atom_pairs object, its content must be
     286             : !>                    NULL and cannot be UNDEFINED
     287             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     288             : ! **************************************************************************************************
     289        1600 :    SUBROUTINE fb_com_atom_pairs_create(atom_pairs)
     290             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs
     291             : 
     292        1600 :       CPASSERT(.NOT. ASSOCIATED(atom_pairs%obj))
     293        1600 :       ALLOCATE (atom_pairs%obj)
     294        1600 :       atom_pairs%obj%npairs = 0
     295        1600 :       atom_pairs%obj%natoms_encode = 0
     296             :       NULLIFY (atom_pairs%obj%pairs)
     297        1600 :    END SUBROUTINE fb_com_atom_pairs_create
     298             : 
     299             : ! **********************************************************************
     300             : !> \brief Initialises an fb_com_tasks object, and makes it empty
     301             : !> \param com_tasks the fb_com_tasks object, its content must not be
     302             : !>                   NULL or UNDEFINED
     303             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     304             : ! **************************************************************************************************
     305           0 :    SUBROUTINE fb_com_tasks_init(com_tasks)
     306             :       TYPE(fb_com_tasks_obj), INTENT(INOUT)              :: com_tasks
     307             : 
     308           0 :       CPASSERT(ASSOCIATED(com_tasks%obj))
     309           0 :       IF (ASSOCIATED(com_tasks%obj%tasks)) THEN
     310           0 :          DEALLOCATE (com_tasks%obj%tasks)
     311             :       END IF
     312           0 :       com_tasks%obj%task_dim = TASK_N_RECORDS
     313           0 :       com_tasks%obj%ntasks = 0
     314           0 :       com_tasks%obj%nencode = 0
     315           0 :    END SUBROUTINE fb_com_tasks_init
     316             : 
     317             : ! **********************************************************************
     318             : !> \brief Initialises an fb_com_atom_pairs object, and makes it empty
     319             : !> \param atom_pairs the fb_com_atom_pairs object, its content must not
     320             : !>                    be NULL or UNDEFINED
     321             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     322             : ! **************************************************************************************************
     323        3264 :    SUBROUTINE fb_com_atom_pairs_init(atom_pairs)
     324             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs
     325             : 
     326        3264 :       CPASSERT(ASSOCIATED(atom_pairs%obj))
     327        3264 :       IF (ASSOCIATED(atom_pairs%obj%pairs)) THEN
     328          32 :          DEALLOCATE (atom_pairs%obj%pairs)
     329             :       END IF
     330        3264 :       atom_pairs%obj%npairs = 0
     331        3264 :       atom_pairs%obj%natoms_encode = 0
     332        3264 :    END SUBROUTINE fb_com_atom_pairs_init
     333             : 
     334             : ! **********************************************************************
     335             : !> \brief Gets attributes from a fb_com_tasks object, one should only
     336             : !>        access the data content in a fb_com_tasks object outside this
     337             : !>        module via this procedure.
     338             : !> \param com_tasks the fb_com_tasks object, its content must not be
     339             : !>                   NULL or UNDEFINED
     340             : !> \param task_dim [OPTIONAL]: if present, outputs com_tasks%obj%task_dim
     341             : !> \param ntasks [OPTIONAL]: if present, outputs com_tasks%obj%ntasks
     342             : !> \param nencode [OPTIONAL]: if present, outputs com_tasks%obj%nencode
     343             : !> \param tasks [OPTIONAL]: if present, outputs pointer com_tasks%obj%tasks
     344             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     345             : ! **************************************************************************************************
     346        3536 :    SUBROUTINE fb_com_tasks_get(com_tasks, &
     347             :                                task_dim, &
     348             :                                ntasks, &
     349             :                                nencode, &
     350             :                                tasks)
     351             :       TYPE(fb_com_tasks_obj), INTENT(IN)                 :: com_tasks
     352             :       INTEGER, INTENT(OUT), OPTIONAL                     :: task_dim, ntasks, nencode
     353             :       INTEGER(KIND=int_8), DIMENSION(:, :), OPTIONAL, &
     354             :          POINTER                                         :: tasks
     355             : 
     356        3536 :       CPASSERT(ASSOCIATED(com_tasks%obj))
     357        3536 :       IF (PRESENT(task_dim)) task_dim = com_tasks%obj%task_dim
     358        3536 :       IF (PRESENT(ntasks)) ntasks = com_tasks%obj%ntasks
     359        3536 :       IF (PRESENT(nencode)) nencode = com_tasks%obj%nencode
     360        3536 :       IF (PRESENT(tasks)) tasks => com_tasks%obj%tasks
     361        3536 :    END SUBROUTINE fb_com_tasks_get
     362             : 
     363             : ! **********************************************************************
     364             : !> \brief Gets attributes from a fb_com_atom_pairs object, one should
     365             : !>        only access the data content in a fb_com_atom_pairs object
     366             : !>        outside this module via this procedure.
     367             : !> \param atom_pairs the fb_com_atom_pairs object, its content must not
     368             : !>                    be NULL or UNDEFINED
     369             : !> \param npairs [OPTIONAL]: if present, outputs atom_pairs%obj%npairs
     370             : !> \param natoms_encode [OPTIONAL]: if present, outputs atom_pairs%obj%natoms_encode
     371             : !> \param pairs [OPTIONAL]: if present, outputs pointer atom_pairs%obj%pairs
     372             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     373             : ! **************************************************************************************************
     374        3296 :    SUBROUTINE fb_com_atom_pairs_get(atom_pairs, &
     375             :                                     npairs, &
     376             :                                     natoms_encode, &
     377             :                                     pairs)
     378             :       TYPE(fb_com_atom_pairs_obj), INTENT(IN)            :: atom_pairs
     379             :       INTEGER, INTENT(OUT), OPTIONAL                     :: npairs, natoms_encode
     380             :       INTEGER(KIND=int_8), DIMENSION(:), OPTIONAL, &
     381             :          POINTER                                         :: pairs
     382             : 
     383        3296 :       CPASSERT(ASSOCIATED(atom_pairs%obj))
     384        3296 :       IF (PRESENT(npairs)) npairs = atom_pairs%obj%npairs
     385        3296 :       IF (PRESENT(natoms_encode)) natoms_encode = atom_pairs%obj%natoms_encode
     386        3296 :       IF (PRESENT(pairs)) pairs => atom_pairs%obj%pairs
     387        3296 :    END SUBROUTINE fb_com_atom_pairs_get
     388             : 
     389             : ! **********************************************************************
     390             : !> \brief Sets attributes in a fb_com_tasks object, one should only
     391             : !>        access the data content in a fb_com_tasks object outside this
     392             : !>        module via this procedure.
     393             : !> \param com_tasks the fb_com_tasks object, its content must not be
     394             : !>                   NULL or UNDEFINED
     395             : !> \param task_dim [OPTIONAL]: if present, sets com_tasks%obj%task_dim
     396             : !> \param ntasks [OPTIONAL]: if present, sets com_tasks%obj%ntasks
     397             : !> \param nencode [OPTIONAL]: if present, sets com_tasks%obj%nencode
     398             : !> \param tasks [OPTIONAL]: if present, associates pointer com_tasks%obj%tasks
     399             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     400             : ! **************************************************************************************************
     401        2720 :    SUBROUTINE fb_com_tasks_set(com_tasks, &
     402             :                                task_dim, &
     403             :                                ntasks, &
     404             :                                nencode, &
     405             :                                tasks)
     406             :       TYPE(fb_com_tasks_obj), INTENT(INOUT)              :: com_tasks
     407             :       INTEGER, INTENT(IN), OPTIONAL                      :: task_dim, ntasks, nencode
     408             :       INTEGER(KIND=int_8), DIMENSION(:, :), OPTIONAL, &
     409             :          POINTER                                         :: tasks
     410             : 
     411        2720 :       CPASSERT(ASSOCIATED(com_tasks%obj))
     412        2720 :       IF (PRESENT(task_dim)) com_tasks%obj%task_dim = task_dim
     413        2720 :       IF (PRESENT(ntasks)) com_tasks%obj%ntasks = ntasks
     414        2720 :       IF (PRESENT(nencode)) com_tasks%obj%nencode = nencode
     415        2720 :       IF (PRESENT(tasks)) THEN
     416        2176 :          IF (ASSOCIATED(com_tasks%obj%tasks)) THEN
     417         544 :             DEALLOCATE (com_tasks%obj%tasks)
     418             :          END IF
     419        2176 :          com_tasks%obj%tasks => tasks
     420             :       END IF
     421        2720 :    END SUBROUTINE fb_com_tasks_set
     422             : 
     423             : ! **********************************************************************
     424             : !> \brief Sets attributes in a fb_com_atom_pairs object, one should only
     425             : !>        access the data content in a fb_com_atom_pairs object outside
     426             : !>        this module via this procedure.
     427             : !> \param atom_pairs the fb_com_atom_pairs object, its content must not
     428             : !>                    be NULL or UNDEFINED
     429             : !> \param npairs [OPTIONAL]: if present, sets atom_pairs%obj%npairs
     430             : !> \param natoms_encode [OPTIONAL]: if present, sets atom_pairs%obj%natoms_encode
     431             : !> \param pairs [OPTIONAL]: if present, associates pointer atom_pairs%obj%pairs
     432             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     433             : ! **************************************************************************************************
     434        1632 :    SUBROUTINE fb_com_atom_pairs_set(atom_pairs, &
     435             :                                     npairs, &
     436             :                                     natoms_encode, &
     437             :                                     pairs)
     438             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs
     439             :       INTEGER, INTENT(IN), OPTIONAL                      :: npairs, natoms_encode
     440             :       INTEGER(KIND=int_8), DIMENSION(:), OPTIONAL, &
     441             :          POINTER                                         :: pairs
     442             : 
     443        1632 :       CPASSERT(ASSOCIATED(atom_pairs%obj))
     444        1632 :       IF (PRESENT(npairs)) atom_pairs%obj%npairs = npairs
     445        1632 :       IF (PRESENT(natoms_encode)) atom_pairs%obj%natoms_encode = natoms_encode
     446        1632 :       IF (PRESENT(pairs)) THEN
     447        1632 :          IF (ASSOCIATED(atom_pairs%obj%pairs)) THEN
     448           0 :             DEALLOCATE (atom_pairs%obj%pairs)
     449             :          END IF
     450        1632 :          atom_pairs%obj%pairs => pairs
     451             :       END IF
     452        1632 :    END SUBROUTINE fb_com_atom_pairs_set
     453             : 
     454             : ! **********************************************************************
     455             : !> \brief Start from a local set of tasks that has desc/src process equal
     456             : !>        to the local MPI rank, communicate with other processes so
     457             : !>        that a new local set of tasks is constructed with src/desc
     458             : !>        process equal to the local MPI rank
     459             : !> \param tasks_dest_is_me the local com_task object with all tasks
     460             : !>                          having the desc process id equal to my_id
     461             : !> \param direction direction of operation:
     462             : !>                   ">" means from tasks_dest_is_me construct tasks_src_is_me
     463             : !>                   "<" means from tasks_src_is_me construct tasks_dest_is_me
     464             : !> \param tasks_src_is_me the local com_task object with all tasks
     465             : !>                          having the src process id equal to my_id
     466             : !> \param para_env CP2K parallel environment object that stores MPI related
     467             : !>                  information of the current run
     468             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     469             : ! **************************************************************************************************
     470        1360 :    SUBROUTINE fb_com_tasks_transpose_dest_src(tasks_dest_is_me, &
     471             :                                               direction, &
     472             :                                               tasks_src_is_me, &
     473             :                                               para_env)
     474             :       TYPE(fb_com_tasks_obj), INTENT(INOUT)              :: tasks_dest_is_me
     475             :       CHARACTER, INTENT(IN)                              :: direction
     476             :       TYPE(fb_com_tasks_obj), INTENT(INOUT)              :: tasks_src_is_me
     477             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     478             : 
     479             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_com_tasks_transpose_dest_src'
     480             : 
     481             :       INTEGER                                            :: handle, ii, ind, ipe, itask, jj, &
     482             :                                                             nencode, ntasks_in, ntasks_out, rank, &
     483             :                                                             rank_pos, task_dim
     484        1360 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_in, tasks_out
     485        1360 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_buf, recv_disps, recv_sizes, &
     486        1360 :                                                             send_buf, send_disps, send_sizes
     487             : 
     488        1360 :       CALL timeset(routineN, handle)
     489             : 
     490        1360 :       NULLIFY (tasks_in, tasks_out)
     491             : 
     492        1360 :       IF (direction == "<") THEN
     493             :          CALL fb_com_tasks_get(com_tasks=tasks_src_is_me, &
     494             :                                task_dim=task_dim, &
     495             :                                ntasks=ntasks_in, &
     496             :                                tasks=tasks_in, &
     497         816 :                                nencode=nencode)
     498         816 :          rank_pos = TASK_DEST
     499             :       ELSE
     500             :          CALL fb_com_tasks_get(com_tasks=tasks_dest_is_me, &
     501             :                                task_dim=task_dim, &
     502             :                                ntasks=ntasks_in, &
     503             :                                tasks=tasks_in, &
     504         544 :                                nencode=nencode)
     505         544 :          rank_pos = TASK_SRC
     506             :       END IF
     507             : 
     508             :       ! allocate local arrays
     509        4080 :       ALLOCATE (send_sizes(para_env%num_pe))
     510        4080 :       ALLOCATE (send_disps(para_env%num_pe))
     511        4080 :       ALLOCATE (send_buf(para_env%num_pe))
     512             : 
     513        4080 :       ALLOCATE (recv_sizes(para_env%num_pe))
     514        4080 :       ALLOCATE (recv_disps(para_env%num_pe))
     515        4080 :       ALLOCATE (recv_buf(para_env%num_pe))
     516             : 
     517             :       ! first count how many local recv/send tasks need to be sent to
     518             :       ! other processes, and share this information with the other
     519             :       ! processes.  using send_buf as a temporary array for counting
     520        4080 :       send_buf = 0
     521             :       ! looping over local task list
     522       50000 :       DO itask = 1, ntasks_in
     523       48640 :          rank = INT(tasks_in(rank_pos, itask)) + 1
     524       50000 :          send_buf(rank) = send_buf(rank) + 1
     525             :       END DO
     526             : 
     527        1360 :       CALL para_env%alltoall(send_buf, recv_buf, 1)
     528             : 
     529             :       ! now that we know how many recv/send tasks to send, pack the
     530             :       ! tasks, and send them around, so that the recv/send tasks are
     531             :       ! sent to the correct src/dest processes, and these then are
     532             :       ! collected into the send/recv tasks list on each of the src/dest
     533             :       ! processes
     534             : 
     535        4080 :       send_sizes = 0
     536        4080 :       send_disps = 0
     537        4080 :       recv_sizes = 0
     538        4080 :       recv_disps = 0
     539             : 
     540             :       ! work out the sizes of send and recv buffers and allocate them
     541        1360 :       send_sizes(1) = send_buf(1)*task_dim
     542        1360 :       recv_sizes(1) = recv_buf(1)*task_dim
     543        2720 :       DO ipe = 2, para_env%num_pe
     544        1360 :          send_sizes(ipe) = send_buf(ipe)*task_dim
     545        1360 :          send_disps(ipe) = send_disps(ipe - 1) + send_sizes(ipe - 1)
     546        1360 :          recv_sizes(ipe) = recv_buf(ipe)*task_dim
     547        2720 :          recv_disps(ipe) = recv_disps(ipe - 1) + recv_sizes(ipe - 1)
     548             :       END DO
     549             : 
     550             :       ! reallocate send and recv buffers to the correct sizes for
     551             :       ! transferring the actual tasks
     552        1360 :       DEALLOCATE (send_buf)
     553        1360 :       DEALLOCATE (recv_buf)
     554        6800 :       ALLOCATE (send_buf(SUM(send_sizes)))
     555        6800 :       ALLOCATE (recv_buf(SUM(recv_sizes)))
     556             : 
     557             :       ! now that the send buffer is of correct size, do packing
     558             :       ! send_buf and recv_buf may be zero sized
     559      197280 :       IF (SIZE(send_buf) > 0) send_buf = 0
     560      197280 :       IF (SIZE(recv_buf) > 0) recv_buf = 0
     561        4080 :       send_sizes = 0
     562       50000 :       DO itask = 1, ntasks_in
     563       48640 :          rank = INT(tasks_in(rank_pos, itask)) + 1
     564      243200 :          DO ii = 1, task_dim
     565      194560 :             ind = send_disps(rank) + send_sizes(rank) + ii
     566      243200 :             send_buf(ind) = INT(tasks_in(ii, itask))
     567             :          END DO
     568       50000 :          send_sizes(rank) = send_sizes(rank) + task_dim
     569             :       END DO
     570             :       ! do communication
     571             :       CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
     572        1360 :                              recv_buf, recv_sizes, recv_disps)
     573             : 
     574             :       ! deallocate send buffers
     575        1360 :       DEALLOCATE (send_buf)
     576        1360 :       DEALLOCATE (send_sizes)
     577        1360 :       DEALLOCATE (send_disps)
     578             : 
     579             :       ! allocate the output task list
     580        4080 :       ntasks_out = SUM(recv_sizes)/task_dim
     581             :       ! this will not be deallocated in this subroutine
     582        5440 :       ALLOCATE (tasks_out(task_dim, ntasks_out))
     583             : 
     584             :       ! do unpacking
     585        1360 :       itask = 0
     586        4080 :       DO ipe = 1, para_env%num_pe
     587       52720 :          DO ii = 0, recv_sizes(ipe)/task_dim - 1
     588       48640 :             itask = itask + 1
     589      245920 :             DO jj = 1, task_dim
     590      194560 :                ind = recv_disps(ipe) + ii*task_dim + jj
     591      243200 :                tasks_out(jj, itask) = recv_buf(ind)
     592             :             END DO
     593             :          END DO
     594             :       END DO
     595             : 
     596             :       ! set output tasks
     597        1360 :       IF (direction == "<") THEN
     598             :          CALL fb_com_tasks_set(com_tasks=tasks_dest_is_me, &
     599             :                                task_dim=task_dim, &
     600             :                                ntasks=ntasks_out, &
     601             :                                tasks=tasks_out, &
     602         816 :                                nencode=nencode)
     603             :       ELSE
     604             :          CALL fb_com_tasks_set(com_tasks=tasks_src_is_me, &
     605             :                                task_dim=task_dim, &
     606             :                                ntasks=ntasks_out, &
     607             :                                tasks=tasks_out, &
     608         544 :                                nencode=nencode)
     609             :       END IF
     610             : 
     611             :       ! deallocate recv buffers
     612        1360 :       DEALLOCATE (recv_buf)
     613        1360 :       DEALLOCATE (recv_sizes)
     614        1360 :       DEALLOCATE (recv_disps)
     615             : 
     616        1360 :       CALL timestop(handle)
     617             : 
     618        1360 :    END SUBROUTINE fb_com_tasks_transpose_dest_src
     619             : 
     620             : ! **********************************************************************
     621             : !> \brief Generate send or receive atom_pair lists from a com_tasks
     622             : !>        object. atom_pair list is used as a condensed index for the
     623             : !>        local/remote matrix blocks to be sent/received.
     624             : !> \param com_tasks the com_tasks object
     625             : !> \param atom_pairs fb_com_atom_pairs_obj containing  list of encoded
     626             : !>                    atomic pair indices and the dest/src proc id for
     627             : !>                    the matrix block to be sent/received.
     628             : !> \param natoms_encode the total number of atoms the atomic pair indices
     629             : !>                       corresponds to, and it is used for encode the
     630             : !>                       atom_pairs values
     631             : !> \param send_or_recv whether the atom_pair to be generated is for
     632             : !>                      the local matrix blocks to be sent or the
     633             : !>                      remote matrix blocks to be received for this MPI
     634             : !>                      process
     635             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     636             : ! **************************************************************************************************
     637        1632 :    SUBROUTINE fb_com_tasks_build_atom_pairs(com_tasks, &
     638             :                                             atom_pairs, &
     639             :                                             natoms_encode, &
     640             :                                             send_or_recv)
     641             :       TYPE(fb_com_tasks_obj), INTENT(IN)                 :: com_tasks
     642             :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs
     643             :       INTEGER, INTENT(IN)                                :: natoms_encode
     644             :       CHARACTER(len=*), INTENT(IN)                       :: send_or_recv
     645             : 
     646             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_com_tasks_build_atom_pairs'
     647             : 
     648             :       INTEGER                                            :: handle, iatom, ii, itask, jatom, npairs, &
     649             :                                                             ntasks, rank, rank_pos
     650             :       INTEGER(KIND=int_8)                                :: pair
     651        1632 :       INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs
     652        1632 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks
     653        1632 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: tmp_index
     654             :       LOGICAL                                            :: check_ok
     655             : 
     656        1632 :       CALL timeset(routineN, handle)
     657             : 
     658        1632 :       NULLIFY (pairs, tasks)
     659             : 
     660        1632 :       check_ok = fb_com_atom_pairs_has_data(atom_pairs)
     661        1632 :       CPASSERT(check_ok)
     662             : 
     663             :       ! initialise atom_pairs
     664        1632 :       CALL fb_com_atom_pairs_init(atom_pairs)
     665             : 
     666        1632 :       IF (TRIM(send_or_recv) == "send") THEN
     667             :          rank_pos = TASK_DEST
     668             :       ELSE
     669         816 :          rank_pos = TASK_SRC
     670             :       END IF
     671             : 
     672             :       CALL fb_com_tasks_get(com_tasks=com_tasks, &
     673             :                             ntasks=ntasks, &
     674        1632 :                             tasks=tasks)
     675             : 
     676        4896 :       ALLOCATE (pairs(ntasks))
     677             :       ! we can have cases where ntasks == 0
     678       52832 :       IF (SIZE(pairs) > 0) pairs = 0
     679        1632 :       npairs = ntasks
     680             : 
     681       52832 :       DO itask = 1, ntasks
     682       51200 :          pair = tasks(TASK_PAIR, itask)
     683       51200 :          CALL fb_com_tasks_decode_pair(pair, iatom, jatom, natoms_encode)
     684       51200 :          rank = INT(tasks(rank_pos, itask))
     685             :          CALL fb_com_atom_pairs_encode(pairs(itask), &
     686       52832 :                                        rank, iatom, jatom, natoms_encode)
     687             :       END DO
     688             : 
     689             :       ! sort atom_pairs so that the pairs are ordered process blocks and
     690             :       ! that possible duplicates may be found (we don't want to send or
     691             :       ! receive same information to the same destination or source more
     692             :       ! than once)
     693        1632 :       IF (npairs > 0) THEN
     694        4896 :          ALLOCATE (tmp_index(npairs))
     695             :          ! only sort the actual pairs recorded in the send list
     696        1632 :          CALL sort(pairs, npairs, tmp_index)
     697        1632 :          DEALLOCATE (tmp_index)
     698             :       END IF
     699             : 
     700             :       ! remove duplicates
     701        1632 :       IF (npairs > 1) THEN
     702        1632 :          npairs = 1
     703             :          ! first atom pair must be allowed
     704       51200 :          DO ii = 2, ntasks
     705       51200 :             IF (pairs(ii) > pairs(ii - 1)) THEN
     706       42656 :                npairs = npairs + 1
     707       42656 :                pairs(npairs) = pairs(ii)
     708             :             END IF
     709             :          END DO
     710             :          ! reallocate the pairs list
     711        1632 :          CALL reallocate(pairs, 1, npairs)
     712             :       END IF
     713             : 
     714             :       CALL fb_com_atom_pairs_set(atom_pairs=atom_pairs, &
     715             :                                  pairs=pairs, &
     716             :                                  npairs=npairs, &
     717        1632 :                                  natoms_encode=natoms_encode)
     718             : 
     719        1632 :       CALL timestop(handle)
     720             : 
     721        3264 :    END SUBROUTINE fb_com_tasks_build_atom_pairs
     722             : 
     723             : ! **********************************************************************
     724             : !> \brief Encodes (iatom, jatom) pair index of a block into a single
     725             : !>        integer
     726             : !> \param ind encoded integer
     727             : !> \param iatom the first index of the (iatom, jatom) block index
     728             : !> \param jatom the second index of the (iatom, jatom) block index
     729             : !> \param natoms the total number of atoms iatom and jatom indexes
     730             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     731             : ! **************************************************************************************************
     732       76800 :    SUBROUTINE fb_com_tasks_encode_pair(ind, iatom, jatom, natoms)
     733             :       INTEGER(KIND=int_8), INTENT(OUT)                   :: ind
     734             :       INTEGER, INTENT(IN)                                :: iatom, jatom, natoms
     735             : 
     736             :       INTEGER(KIND=int_8)                                :: iatom8, jatom8, natoms8
     737             : 
     738       76800 :       natoms8 = INT(natoms, int_8)
     739       76800 :       iatom8 = INT(iatom, int_8)
     740       76800 :       jatom8 = INT(jatom, int_8)
     741             : 
     742       76800 :       ind = (iatom8 - 1_int_8)*natoms8 + (jatom8 - 1_int_8)
     743       76800 :    END SUBROUTINE fb_com_tasks_encode_pair
     744             : 
     745             : ! **********************************************************************
     746             : !> \brief Dncodes a single integer into (iatom, jatom) pair index of
     747             : !>        a block into a single
     748             : !> \param ind encoded integer
     749             : !> \param iatom the first index of the (iatom, jatom) block index
     750             : !> \param jatom the second index of the (iatom, jatom) block index
     751             : !> \param natoms the total number of atoms iatom and jatom indexes
     752             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     753             : ! **************************************************************************************************
     754      162816 :    SUBROUTINE fb_com_tasks_decode_pair(ind, iatom, jatom, natoms)
     755             :       INTEGER(KIND=int_8), INTENT(IN)                    :: ind
     756             :       INTEGER, INTENT(OUT)                               :: iatom, jatom
     757             :       INTEGER, INTENT(IN)                                :: natoms
     758             : 
     759             :       INTEGER(KIND=int_8)                                :: iatom8, jatom8, natoms8
     760             : 
     761      162816 :       natoms8 = INT(natoms, int_8)
     762      162816 :       iatom8 = ind/natoms8 + 1_int_8
     763      162816 :       jatom8 = MOD(ind, natoms8) + 1_int_8
     764      162816 :       iatom = INT(iatom8, int_4)
     765      162816 :       jatom = INT(jatom8, int_4)
     766      162816 :    END SUBROUTINE fb_com_tasks_decode_pair
     767             : 
     768             : ! **********************************************************************
     769             : !> \brief Encodes (rank, iatom, jatom) index of a communication task---to
     770             : !>         send/receive a block to/from a process---into a single integer
     771             : !> \param ind encoded integer
     772             : !> \param pe the rank of the process the block to be send to or receive
     773             : !>            from
     774             : !> \param iatom the first index of the (iatom, jatom) block index
     775             : !> \param jatom the second index of the (iatom, jatom) block index
     776             : !> \param natoms the total number of atoms iatom and jatom indexes
     777             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     778             : ! **************************************************************************************************
     779       51200 :    SUBROUTINE fb_com_atom_pairs_encode(ind, pe, iatom, jatom, natoms)
     780             :       INTEGER(KIND=int_8), INTENT(OUT)                   :: ind
     781             :       INTEGER, INTENT(IN)                                :: pe, iatom, jatom, natoms
     782             : 
     783             :       INTEGER(KIND=int_8)                                :: natoms8, pair
     784             : 
     785             : ! pe must start count from 0 (i.e same as MPI convension)
     786             : 
     787       51200 :       natoms8 = INT(natoms, int_8)
     788       51200 :       CALL fb_com_tasks_encode_pair(pair, iatom, jatom, natoms)
     789       51200 :       ind = INT(pe, int_8)*natoms8*natoms8 + pair
     790       51200 :    END SUBROUTINE fb_com_atom_pairs_encode
     791             : 
     792             : ! **********************************************************************
     793             : !> \brief Decodes a single integer into the (rank, iatom, jatom) index
     794             : !>        of a communication task to send/receive a block to/from a
     795             : !>        process
     796             : !> \param ind    : encoded integer
     797             : !> \param pe     : the rank of the process the block to be send to or receive
     798             : !>            from
     799             : !> \param iatom  : the first index of the (iatom, jatom) block index
     800             : !> \param jatom  : the second index of the (iatom, jatom) block index
     801             : !> \param natoms : the total number of atoms iatom and jatom indexes
     802             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     803             : ! **************************************************************************************************
     804       88576 :    SUBROUTINE fb_com_atom_pairs_decode(ind, pe, iatom, jatom, natoms)
     805             :       INTEGER(KIND=int_8), INTENT(IN)                    :: ind
     806             :       INTEGER, INTENT(OUT)                               :: pe, iatom, jatom
     807             :       INTEGER, INTENT(IN)                                :: natoms
     808             : 
     809             :       INTEGER(KIND=int_8)                                :: natoms8, pair
     810             : 
     811             : ! pe start count from 0 (i.e same as MPI convension)
     812             : 
     813       88576 :       natoms8 = INT(natoms, int_8)
     814       88576 :       pe = INT(ind/(natoms8*natoms8), int_4)
     815       88576 :       pair = MOD(ind, natoms8*natoms8)
     816       88576 :       CALL fb_com_tasks_decode_pair(pair, iatom, jatom, natoms)
     817       88576 :    END SUBROUTINE fb_com_atom_pairs_decode
     818             : 
     819             : ! **********************************************************************
     820             : !> \brief Calculate the MPI send or recv buffer sizes according to the
     821             : !>        communication pairs (atom_pairs) and DBCSR matrix data.
     822             : !>        Each atom_pair corresponds to one DBCSR matrix block that
     823             : !>        needs to be sent or recerived.
     824             : !> \param atom_pairs : the communication pair object for either sending
     825             : !>                     or receiving
     826             : !> \param nprocs : total number of MPI processes in communicator
     827             : !> \param row_blk_sizes : row_blk_sizes(iblkrow) = number of element rows
     828             : !>                        in each block in the iblkrow-th block row of
     829             : !>                        the DBCSR matrix
     830             : !> \param col_blk_sizes : col_blk_sizes(iblkcol) = number of element cols
     831             : !>                        in each block in the iblkcol-th block col of
     832             : !>                        the DBCSR matrix
     833             : !> \param sendrecv_sizes : size required for the send of recv buffer
     834             : !>                         for each dest/src process
     835             : !> \param sendrecv_disps : sendrecv_disps(ipe) + 1 = starting location
     836             : !>                         in send/recv buffer for data destined for
     837             : !>                         process ipe
     838             : !> \param sendrecv_pair_counts : sendrecv_pair_counts(ipe) = number of
     839             : !>                               pairs (blocks) to be sent to or recv
     840             : !>                               from process ipe
     841             : !> \param sendrecv_pair_disps send_recv_pair_disps(ipe) + 1 = start
     842             : !>                               location in atom_pairs array for
     843             : !>                               all the pairs to be sent to or recv
     844             : !>                               from process ipe
     845             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     846             : ! **************************************************************************************************
     847        3264 :    SUBROUTINE fb_com_atom_pairs_calc_buffer_sizes(atom_pairs, &
     848             :                                                   nprocs, &
     849        3264 :                                                   row_blk_sizes, &
     850        1632 :                                                   col_blk_sizes, &
     851        1632 :                                                   sendrecv_sizes, &
     852        1632 :                                                   sendrecv_disps, &
     853        1632 :                                                   sendrecv_pair_counts, &
     854        1632 :                                                   sendrecv_pair_disps)
     855             :       TYPE(fb_com_atom_pairs_obj), INTENT(IN)            :: atom_pairs
     856             :       INTEGER, INTENT(IN)                                :: nprocs
     857             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: row_blk_sizes, col_blk_sizes
     858             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: sendrecv_sizes, sendrecv_disps, &
     859             :                                                             sendrecv_pair_counts, &
     860             :                                                             sendrecv_pair_disps
     861             : 
     862             :       INTEGER                                            :: iatom, ipair, ipe, jatom, natoms_encode, &
     863             :                                                             ncols_blk, npairs, nrows_blk, pe
     864        1632 :       INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs
     865             :       LOGICAL                                            :: check_ok
     866             : 
     867        1632 :       NULLIFY (pairs)
     868             : 
     869             :       check_ok = SIZE(sendrecv_sizes) == nprocs .AND. &
     870             :                  SIZE(sendrecv_disps) == nprocs .AND. &
     871             :                  SIZE(sendrecv_pair_counts) == nprocs .AND. &
     872        1632 :                  SIZE(sendrecv_pair_disps) == nprocs
     873           0 :       CPASSERT(check_ok)
     874             : 
     875        1632 :       check_ok = fb_com_atom_pairs_has_data(atom_pairs)
     876        1632 :       CPASSERT(check_ok)
     877             : 
     878             :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs, &
     879             :                                  pairs=pairs, &
     880             :                                  npairs=npairs, &
     881        1632 :                                  natoms_encode=natoms_encode)
     882             : 
     883        4896 :       sendrecv_sizes = 0
     884        4896 :       sendrecv_pair_counts = 0
     885       45920 :       DO ipair = 1, npairs
     886             :          ! decode processor and (iatom, jatom) information
     887             :          CALL fb_com_atom_pairs_decode(pairs(ipair), &
     888       44288 :                                        pe, iatom, jatom, natoms_encode)
     889       44288 :          pe = pe + 1 ! we need proc to count from 1
     890       44288 :          nrows_blk = row_blk_sizes(iatom)
     891       44288 :          ncols_blk = col_blk_sizes(jatom)
     892       44288 :          sendrecv_sizes(pe) = sendrecv_sizes(pe) + nrows_blk*ncols_blk
     893       45920 :          sendrecv_pair_counts(pe) = sendrecv_pair_counts(pe) + 1
     894             :       END DO
     895             :       ! calculate displacements of the data of each destibation pe in
     896             :       ! send buffer and in the list of pairs to be sent
     897        4896 :       sendrecv_disps = 0
     898        4896 :       sendrecv_pair_disps = 0
     899        3264 :       DO ipe = 2, nprocs
     900        1632 :          sendrecv_disps(ipe) = sendrecv_disps(ipe - 1) + sendrecv_sizes(ipe - 1)
     901        3264 :          sendrecv_pair_disps(ipe) = sendrecv_pair_disps(ipe - 1) + sendrecv_pair_counts(ipe - 1)
     902             :       END DO
     903             : 
     904        1632 :    END SUBROUTINE fb_com_atom_pairs_calc_buffer_sizes
     905             : 
     906             : ! ****************************************************************************
     907             : !> \brief Given send and recv fb_com_atom_pair object, gather all the
     908             : !>        relevant DBCSR matrix blocks together, and add them to
     909             : !>        a fb_matrix_data object for storage
     910             : !> \param dbcsr_mat : the DBCSR matrix where the matrix blocks will be
     911             : !>                    obtained from
     912             : !> \param atom_pairs_send : prescription on exactly which DBCSR blocks
     913             : !>                          are to be sent to where
     914             : !> \param atom_pairs_recv : prescription on exactly which DBCSR blocks
     915             : !>                          are to be received from where
     916             : !> \param para_env        : CP2K parallel environment
     917             : !> \param matrix_storage  : the fb_matrix_data object to store the
     918             : !>                          received DBCSR matrix blocks
     919             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     920             : ! **************************************************************************************************
     921          32 :    SUBROUTINE fb_com_atom_pairs_gather_blks(dbcsr_mat, &
     922             :                                             atom_pairs_send, &
     923             :                                             atom_pairs_recv, &
     924             :                                             para_env, &
     925             :                                             matrix_storage)
     926             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     927             :       TYPE(fb_com_atom_pairs_obj), INTENT(IN)            :: atom_pairs_send, atom_pairs_recv
     928             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     929             :       TYPE(fb_matrix_data_obj), INTENT(INOUT)            :: matrix_storage
     930             : 
     931             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_com_atom_pairs_gather_blks'
     932             : 
     933             :       INTEGER :: handle, iatom, ii, ind, ipair, ipe, jatom, jj, ncols_blk, ncols_blk_max, &
     934             :          npairs_recv, npairs_send, nrows_blk, nrows_blk_max, numprocs, pe, recv_encode, send_encode
     935          32 :       INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs_recv, pairs_send
     936             :       INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disps, recv_pair_count, recv_pair_disps, &
     937             :          recv_sizes, send_disps, send_pair_count, send_pair_disps, send_sizes
     938          32 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, row_block_size_data
     939             :       LOGICAL                                            :: check_ok, found
     940             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_buf, send_buf
     941          32 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
     942             : 
     943          32 :       CALL timeset(routineN, handle)
     944             : 
     945          32 :       NULLIFY (pairs_send, pairs_recv, mat_block, &
     946          32 :                row_block_size_data, col_block_size_data)
     947             : 
     948          32 :       check_ok = fb_com_atom_pairs_has_data(atom_pairs_send)
     949          32 :       CPASSERT(check_ok)
     950          32 :       check_ok = fb_com_atom_pairs_has_data(atom_pairs_send)
     951          32 :       CPASSERT(check_ok)
     952          32 :       check_ok = fb_matrix_data_has_data(matrix_storage)
     953          32 :       CPASSERT(check_ok)
     954             : 
     955             :       ! get com pair informations
     956             :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_send, &
     957             :                                  pairs=pairs_send, &
     958             :                                  npairs=npairs_send, &
     959          32 :                                  natoms_encode=send_encode)
     960             :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_recv, &
     961             :                                  pairs=pairs_recv, &
     962             :                                  npairs=npairs_recv, &
     963          32 :                                  natoms_encode=recv_encode)
     964             :       ! get para_env info
     965          32 :       numprocs = para_env%num_pe
     966             : 
     967             :       ! get dbcsr row and col block sizes
     968          32 :       CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
     969             : 
     970             :       ! allocate temporary arrays for send
     971          96 :       ALLOCATE (send_sizes(numprocs))
     972          64 :       ALLOCATE (send_disps(numprocs))
     973          64 :       ALLOCATE (send_pair_count(numprocs))
     974          64 :       ALLOCATE (send_pair_disps(numprocs))
     975             : 
     976             :       ! setup send buffer sizes
     977             :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_send, &
     978             :                                                numprocs, &
     979             :                                                row_block_size_data, &
     980             :                                                col_block_size_data, &
     981             :                                                send_sizes, &
     982             :                                                send_disps, &
     983             :                                                send_pair_count, &
     984          32 :                                                send_pair_disps)
     985             : 
     986             :       ! allocate send buffer
     987         160 :       ALLOCATE (send_buf(SUM(send_sizes)))
     988             : 
     989             :       ! allocate temporary arrays for recv
     990          64 :       ALLOCATE (recv_sizes(numprocs))
     991          64 :       ALLOCATE (recv_disps(numprocs))
     992          64 :       ALLOCATE (recv_pair_count(numprocs))
     993          64 :       ALLOCATE (recv_pair_disps(numprocs))
     994             : 
     995             :       ! setup recv buffer sizes
     996             :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_recv, &
     997             :                                                numprocs, &
     998             :                                                row_block_size_data, &
     999             :                                                col_block_size_data, &
    1000             :                                                recv_sizes, &
    1001             :                                                recv_disps, &
    1002             :                                                recv_pair_count, &
    1003          32 :                                                recv_pair_disps)
    1004             : 
    1005             :       ! allocate recv buffer
    1006         160 :       ALLOCATE (recv_buf(SUM(recv_sizes)))
    1007             : 
    1008             :       ! do packing
    1009          96 :       DO ipe = 1, numprocs
    1010             :          ! need to reuse send_sizes as an accumulative displacement, so recalculate
    1011          64 :          send_sizes(ipe) = 0
    1012        1248 :          DO ipair = 1, send_pair_count(ipe)
    1013             :             CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
    1014        1152 :                                           pe, iatom, jatom, send_encode)
    1015        1152 :             nrows_blk = row_block_size_data(iatom)
    1016        1152 :             ncols_blk = col_block_size_data(jatom)
    1017             :             CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
    1018             :                                    row=iatom, col=jatom, block=mat_block, &
    1019        1152 :                                    found=found)
    1020        1216 :             IF (.NOT. found) THEN
    1021           0 :                CPABORT("Matrix block not found")
    1022             :             ELSE
    1023             :                ! we have found the matrix block
    1024       16128 :                DO jj = 1, ncols_blk
    1025      210816 :                   DO ii = 1, nrows_blk
    1026             :                      ! column major format in blocks
    1027      194688 :                      ind = send_disps(ipe) + send_sizes(ipe) + ii + (jj - 1)*nrows_blk
    1028      209664 :                      send_buf(ind) = mat_block(ii, jj)
    1029             :                   END DO ! ii
    1030             :                END DO ! jj
    1031        1152 :                send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
    1032             :             END IF
    1033             :          END DO ! ipair
    1034             :       END DO ! ipe
    1035             : 
    1036             :       ! do communication
    1037             :       CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
    1038          32 :                              recv_buf, recv_sizes, recv_disps)
    1039             : 
    1040             :       ! cleanup temporary arrays no longer needed
    1041          32 :       DEALLOCATE (send_buf)
    1042          32 :       DEALLOCATE (send_sizes)
    1043          32 :       DEALLOCATE (send_disps)
    1044          32 :       DEALLOCATE (send_pair_count)
    1045          32 :       DEALLOCATE (send_pair_disps)
    1046             : 
    1047             :       ! unpack into matrix_data object
    1048          32 :       NULLIFY (mat_block)
    1049         288 :       nrows_blk_max = MAXVAL(row_block_size_data)
    1050         288 :       ncols_blk_max = MAXVAL(col_block_size_data)
    1051         128 :       ALLOCATE (mat_block(nrows_blk_max, ncols_blk_max))
    1052          96 :       DO ipe = 1, numprocs
    1053          64 :          recv_sizes(ipe) = 0
    1054        1248 :          DO ipair = 1, recv_pair_count(ipe)
    1055             :             CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
    1056        1152 :                                           pe, iatom, jatom, recv_encode)
    1057        1152 :             nrows_blk = row_block_size_data(iatom)
    1058        1152 :             ncols_blk = col_block_size_data(jatom)
    1059             :             ! ALLOCATE(mat_block(nrows_blk,ncols_blk), STAT=stat)
    1060             :             ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
    1061      210816 :             mat_block(:, :) = 0.0_dp
    1062       16128 :             DO jj = 1, ncols_blk
    1063      210816 :                DO ii = 1, nrows_blk
    1064             :                   ! column major format in blocks
    1065      194688 :                   ind = recv_disps(ipe) + recv_sizes(ipe) + ii + (jj - 1)*nrows_blk
    1066      209664 :                   mat_block(ii, jj) = recv_buf(ind)
    1067             :                END DO ! ii
    1068             :             END DO ! jj
    1069             :             CALL fb_matrix_data_add(matrix_storage, &
    1070             :                                     iatom, jatom, &
    1071        1152 :                                     mat_block(1:nrows_blk, 1:ncols_blk))
    1072        2368 :             recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
    1073             :             ! DEALLOCATE(mat_block, STAT=stat)
    1074             :             ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
    1075             :          END DO ! ipair
    1076             :       END DO ! ipe
    1077          32 :       DEALLOCATE (mat_block)
    1078             : 
    1079             :       ! cleanup rest of the temporary arrays
    1080          32 :       DEALLOCATE (recv_buf)
    1081          32 :       DEALLOCATE (recv_sizes)
    1082          32 :       DEALLOCATE (recv_disps)
    1083          32 :       DEALLOCATE (recv_pair_count)
    1084          32 :       DEALLOCATE (recv_pair_disps)
    1085             : 
    1086          32 :       CALL timestop(handle)
    1087             : 
    1088          64 :    END SUBROUTINE fb_com_atom_pairs_gather_blks
    1089             : 
    1090             : ! ****************************************************************************
    1091             : !> \brief Given send and recv fb_com_atom_pair object, distribute the matrix
    1092             : !>        blocks stored in a fb_matrix_data object to a computable DBCSR
    1093             : !>        matrix. It is assumed in this subroutine that the sizes of each
    1094             : !>        block stored in fb_matrix_data object is consistent with the
    1095             : !>        pre-defined block sizes in the DBCSR matrix.
    1096             : !> \param matrix_storage  : the fb_matrix_data object
    1097             : !> \param atom_pairs_send : prescription on exactly which DBCSR blocks
    1098             : !>                          are to be sent to where
    1099             : !> \param atom_pairs_recv : prescription on exactly which DBCSR blocks
    1100             : !>                          are to be received from where
    1101             : !> \param para_env        : CP2K parallel environment
    1102             : !> \param dbcsr_mat : the DBCSR matrix where the matrix blocks will be
    1103             : !>                    distributed to
    1104             : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
    1105             : ! **************************************************************************************************
    1106          16 :    SUBROUTINE fb_com_atom_pairs_distribute_blks(matrix_storage, &
    1107             :                                                 atom_pairs_send, &
    1108             :                                                 atom_pairs_recv, &
    1109             :                                                 para_env, &
    1110             :                                                 dbcsr_mat)
    1111             :       TYPE(fb_matrix_data_obj), INTENT(IN)               :: matrix_storage
    1112             :       TYPE(fb_com_atom_pairs_obj), INTENT(IN)            :: atom_pairs_send, atom_pairs_recv
    1113             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1114             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
    1115             : 
    1116             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_com_atom_pairs_distribute_blks'
    1117             : 
    1118             :       INTEGER :: handle, iatom, ii, ind, ipair, ipe, jatom, jj, ncols_blk, npairs_recv, &
    1119             :          npairs_send, nrows_blk, numprocs, pe, recv_encode, send_encode
    1120          16 :       INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs_recv, pairs_send
    1121             :       INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disps, recv_pair_count, recv_pair_disps, &
    1122             :          recv_sizes, send_disps, send_pair_count, send_pair_disps, send_sizes
    1123          16 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, row_block_size_data
    1124             :       LOGICAL                                            :: check_ok, found
    1125             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_buf, send_buf
    1126          16 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
    1127             : 
    1128          16 :       CALL timeset(routineN, handle)
    1129             : 
    1130          16 :       NULLIFY (pairs_send, pairs_recv, mat_block, &
    1131          16 :                row_block_size_data, col_block_size_data)
    1132             : 
    1133          16 :       check_ok = fb_com_atom_pairs_has_data(atom_pairs_send)
    1134          16 :       CPASSERT(check_ok)
    1135          16 :       check_ok = fb_com_atom_pairs_has_data(atom_pairs_send)
    1136          16 :       CPASSERT(check_ok)
    1137          16 :       check_ok = fb_matrix_data_has_data(matrix_storage)
    1138          16 :       CPASSERT(check_ok)
    1139             : 
    1140             :       ! get com pair informations
    1141             :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_send, &
    1142             :                                  pairs=pairs_send, &
    1143             :                                  npairs=npairs_send, &
    1144          16 :                                  natoms_encode=send_encode)
    1145             :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_recv, &
    1146             :                                  pairs=pairs_recv, &
    1147             :                                  npairs=npairs_recv, &
    1148          16 :                                  natoms_encode=recv_encode)
    1149             :       ! get para_env info
    1150          16 :       numprocs = para_env%num_pe
    1151             : 
    1152             :       ! get dbcsr row and col block sizes
    1153          16 :       CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
    1154             : 
    1155             :       ! allocate temporary arrays for send
    1156          48 :       ALLOCATE (send_sizes(numprocs))
    1157          32 :       ALLOCATE (send_disps(numprocs))
    1158          32 :       ALLOCATE (send_pair_count(numprocs))
    1159          32 :       ALLOCATE (send_pair_disps(numprocs))
    1160             : 
    1161             :       ! setup send buffer sizes
    1162             :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_send, &
    1163             :                                                numprocs, &
    1164             :                                                row_block_size_data, &
    1165             :                                                col_block_size_data, &
    1166             :                                                send_sizes, &
    1167             :                                                send_disps, &
    1168             :                                                send_pair_count, &
    1169          16 :                                                send_pair_disps)
    1170             : 
    1171             :       ! allocate send buffer
    1172          80 :       ALLOCATE (send_buf(SUM(send_sizes)))
    1173             : 
    1174             :       ! allocate temporary arrays for recv
    1175          32 :       ALLOCATE (recv_sizes(numprocs))
    1176          32 :       ALLOCATE (recv_disps(numprocs))
    1177          32 :       ALLOCATE (recv_pair_count(numprocs))
    1178          32 :       ALLOCATE (recv_pair_disps(numprocs))
    1179             : 
    1180             :       ! setup recv buffer sizes
    1181             :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_recv, &
    1182             :                                                numprocs, &
    1183             :                                                row_block_size_data, &
    1184             :                                                col_block_size_data, &
    1185             :                                                recv_sizes, &
    1186             :                                                recv_disps, &
    1187             :                                                recv_pair_count, &
    1188          16 :                                                recv_pair_disps)
    1189             : 
    1190             :       ! allocate recv buffer
    1191          80 :       ALLOCATE (recv_buf(SUM(recv_sizes)))
    1192             : 
    1193             :       ! do packing
    1194          48 :       DO ipe = 1, numprocs
    1195             :          ! need to reuse send_sizes as an accumulative displacement, so recalculate
    1196          32 :          send_sizes(ipe) = 0
    1197         560 :          DO ipair = 1, send_pair_count(ipe)
    1198             :             CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
    1199         512 :                                           pe, iatom, jatom, send_encode)
    1200             :             CALL fb_matrix_data_get(matrix_storage, &
    1201             :                                     iatom, jatom, &
    1202         512 :                                     mat_block, found)
    1203         544 :             IF (.NOT. found) THEN
    1204           0 :                CPABORT("Matrix block not found")
    1205             :             ELSE
    1206         512 :                nrows_blk = row_block_size_data(iatom)
    1207         512 :                ncols_blk = col_block_size_data(jatom)
    1208        2560 :                DO jj = 1, ncols_blk
    1209       29184 :                   DO ii = 1, nrows_blk
    1210             :                      ! column major format in blocks
    1211       26624 :                      ind = send_disps(ipe) + send_sizes(ipe) + ii + (jj - 1)*nrows_blk
    1212       28672 :                      send_buf(ind) = mat_block(ii, jj)
    1213             :                   END DO ! ii
    1214             :                END DO ! jj
    1215         512 :                send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
    1216             :             END IF
    1217             :          END DO ! ipair
    1218             :       END DO ! ipe
    1219             : 
    1220             :       ! do communication
    1221             :       CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
    1222          16 :                              recv_buf, recv_sizes, recv_disps)
    1223             : 
    1224             :       ! cleanup temporary arrays no longer needed
    1225          16 :       DEALLOCATE (send_buf)
    1226          16 :       DEALLOCATE (send_sizes)
    1227          16 :       DEALLOCATE (send_disps)
    1228          16 :       DEALLOCATE (send_pair_count)
    1229          16 :       DEALLOCATE (send_pair_disps)
    1230             : 
    1231             :       ! unpack into DBCSR matrix
    1232          48 :       DO ipe = 1, numprocs
    1233          32 :          recv_sizes(ipe) = 0
    1234         560 :          DO ipair = 1, recv_pair_count(ipe)
    1235             :             CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
    1236         512 :                                           pe, iatom, jatom, recv_encode)
    1237         512 :             nrows_blk = row_block_size_data(iatom)
    1238         512 :             ncols_blk = col_block_size_data(jatom)
    1239         512 :             ind = recv_disps(ipe) + recv_sizes(ipe)
    1240             :             CALL dbcsr_put_block(dbcsr_mat, &
    1241             :                                  iatom, jatom, &
    1242         512 :                                  recv_buf((ind + 1):(ind + nrows_blk*ncols_blk)))
    1243        1056 :             recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
    1244             :          END DO ! ipair
    1245             :       END DO ! ipe
    1246             : 
    1247             :       ! cleanup rest of the temporary arrays
    1248          16 :       DEALLOCATE (recv_buf)
    1249          16 :       DEALLOCATE (recv_sizes)
    1250          16 :       DEALLOCATE (recv_disps)
    1251          16 :       DEALLOCATE (recv_pair_count)
    1252          16 :       DEALLOCATE (recv_pair_disps)
    1253             : 
    1254             :       ! dbcsr matrix is not finalised in this subroutine
    1255             : 
    1256          16 :       CALL timestop(handle)
    1257             : 
    1258          32 :    END SUBROUTINE fb_com_atom_pairs_distribute_blks
    1259             : 
    1260           0 : END MODULE qs_fb_com_tasks_types

Generated by: LCOV version 1.15