LCOV - code coverage report
Current view: top level - src/fm - cp_cfm_types.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4dc10b3) Lines: 383 436 87.8 %
Date: 2024-11-21 06:45:46 Functions: 15 20 75.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Represents a complex full matrix distributed on many processors.
      10             : !> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
      11             : ! **************************************************************************************************
      12             : MODULE cp_cfm_types
      13             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      14             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_equivalent,&
      15             :                                               cp_fm_struct_get,&
      16             :                                               cp_fm_struct_release,&
      17             :                                               cp_fm_struct_retain,&
      18             :                                               cp_fm_struct_type
      19             :    USE cp_fm_types,                     ONLY: cp_fm_type
      20             :    USE kinds,                           ONLY: dp
      21             :    USE mathconstants,                   ONLY: z_one,&
      22             :                                               z_zero
      23             :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      24             :                                               mp_any_source,&
      25             :                                               mp_para_env_type,&
      26             :                                               mp_proc_null,&
      27             :                                               mp_request_null,&
      28             :                                               mp_request_type,&
      29             :                                               mp_waitall
      30             : #include "../base/base_uses.f90"
      31             : 
      32             :    IMPLICIT NONE
      33             :    PRIVATE
      34             : 
      35             :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      36             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
      37             :    INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      38             : 
      39             :    PUBLIC :: cp_cfm_type, cp_cfm_p_type, copy_cfm_info_type
      40             :    PUBLIC :: cp_cfm_cleanup_copy_general, &
      41             :              cp_cfm_create, &
      42             :              cp_cfm_finish_copy_general, &
      43             :              cp_cfm_get_element, &
      44             :              cp_cfm_get_info, &
      45             :              cp_cfm_get_submatrix, &
      46             :              cp_cfm_release, &
      47             :              cp_cfm_set_all, &
      48             :              cp_cfm_set_element, &
      49             :              cp_cfm_set_submatrix, &
      50             :              cp_cfm_start_copy_general, &
      51             :              cp_cfm_to_cfm, &
      52             :              cp_cfm_to_fm, &
      53             :              cp_fm_to_cfm
      54             : 
      55             :    INTERFACE cp_cfm_to_cfm
      56             :       MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
      57             :          cp_cfm_to_cfm_columns ! just a number of columns
      58             :    END INTERFACE
      59             : 
      60             : ! **************************************************************************************************
      61             : !> \brief Represent a complex full matrix.
      62             : !> \param name           the name of the matrix, used for printing
      63             : !> \param matrix_struct structure of this matrix
      64             : !> \param local_data    array with the data of the matrix (its content depends on
      65             : !>                      the matrix type used: in parallel run it will be in
      66             : !>                      ScaLAPACK format, in sequential run it will simply contain the matrix)
      67             : ! **************************************************************************************************
      68             :    TYPE cp_cfm_type
      69             :       CHARACTER(len=60) :: name = ""
      70             :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
      71             :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
      72             :    END TYPE cp_cfm_type
      73             : 
      74             : ! **************************************************************************************************
      75             : !> \brief Just to build arrays of pointers to matrices.
      76             : !> \param matrix the pointer to the matrix
      77             : ! **************************************************************************************************
      78             :    TYPE cp_cfm_p_type
      79             :       TYPE(cp_cfm_type), POINTER :: matrix => NULL()
      80             :    END TYPE cp_cfm_p_type
      81             : 
      82             : ! **************************************************************************************************
      83             : !> \brief Stores the state of a copy between cp_cfm_start_copy_general
      84             : !>        and cp_cfm_finish_copy_general.
      85             : !> \par History
      86             : !>      Jan 2017  derived type 'copy_info_type' has been created [Mark T]
      87             : !>      Jan 2018  the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
      88             : ! **************************************************************************************************
      89             :    TYPE copy_cfm_info_type
      90             :       !> number of MPI processes that send data
      91             :       INTEGER                                     :: send_size = -1
      92             :       !> number of locally stored rows (1) and columns (2) of the destination matrix
      93             :       INTEGER, DIMENSION(2)                       :: nlocal_recv = -1
      94             :       !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
      95             :       INTEGER, DIMENSION(2)                       :: nblock_src = -1
      96             :       !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
      97             :       INTEGER, DIMENSION(2)                       :: src_num_pe = -1
      98             :       !> displacements into recv_buf
      99             :       INTEGER, ALLOCATABLE, DIMENSION(:)          :: recv_disp
     100             :       !> MPI requests for non-blocking receive and send operations
     101             :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)          :: recv_request, send_request
     102             :       !> global column and row indices of locally stored elements of the destination matrix
     103             :       INTEGER, DIMENSION(:), POINTER              :: recv_col_indices => NULL(), recv_row_indices => NULL()
     104             :       !> rank of MPI process with BLACS coordinates (prow, pcol)
     105             :       INTEGER, ALLOCATABLE, DIMENSION(:, :)       :: src_blacs2mpi
     106             :       !> receiving and sending buffers for non-blocking MPI communication
     107             :       COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
     108             :    END TYPE copy_cfm_info_type
     109             : 
     110             : CONTAINS
     111             : 
     112             : ! **************************************************************************************************
     113             : !> \brief Creates a new full matrix with the given structure.
     114             : !> \param matrix        matrix to be created
     115             : !> \param matrix_struct structure of the matrix
     116             : !> \param name          name of the matrix
     117             : !> \note
     118             : !>      preferred allocation routine
     119             : ! **************************************************************************************************
     120      172163 :    SUBROUTINE cp_cfm_create(matrix, matrix_struct, name)
     121             :       TYPE(cp_cfm_type), INTENT(OUT)                     :: matrix
     122             :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     123             :       CHARACTER(len=*), INTENT(in), OPTIONAL             :: name
     124             : 
     125             :       INTEGER                                            :: ncol_local, npcol, nprow, nrow_local
     126             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     127             : 
     128      172163 :       context => matrix_struct%context
     129      172163 :       matrix%matrix_struct => matrix_struct
     130      172163 :       CALL cp_fm_struct_retain(matrix%matrix_struct)
     131             : 
     132      172163 :       nprow = context%num_pe(1)
     133      172163 :       npcol = context%num_pe(2)
     134      172163 :       NULLIFY (matrix%local_data)
     135             : 
     136      172163 :       nrow_local = matrix_struct%local_leading_dimension
     137      172163 :       ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     138      688652 :       ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     139             : 
     140             :       ! do not initialise created matrix as it is up to the user to do so
     141             :       !CALL zcopy(nrow_local*ncol_local, z_zero, 0, matrix%local_data, 1)
     142             : 
     143      172163 :       IF (PRESENT(name)) THEN
     144       16044 :          matrix%name = name
     145             :       ELSE
     146      156119 :          matrix%name = 'full complex matrix'
     147             :       END IF
     148      172163 :    END SUBROUTINE cp_cfm_create
     149             : 
     150             : ! **************************************************************************************************
     151             : !> \brief Releases a full matrix.
     152             : !> \param matrix the matrix to release
     153             : ! **************************************************************************************************
     154      188198 :    SUBROUTINE cp_cfm_release(matrix)
     155             :       TYPE(cp_cfm_type), INTENT(INOUT)                   :: matrix
     156             : 
     157      188198 :       IF (ASSOCIATED(matrix%local_data)) THEN
     158      172163 :          DEALLOCATE (matrix%local_data)
     159             :       END IF
     160      188198 :       matrix%name = ""
     161      188198 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     162      188198 :    END SUBROUTINE cp_cfm_release
     163             : 
     164             : ! **************************************************************************************************
     165             : !> \brief Set all elements of the full matrix to alpha. Besides, set all
     166             : !>        diagonal matrix elements to beta (if given explicitly).
     167             : !> \param matrix  matrix to initialise
     168             : !> \param alpha   value of off-diagonal matrix elements
     169             : !> \param beta    value of diagonal matrix elements (equal to alpha if absent)
     170             : !> \date    12.06.2001
     171             : !> \author  Matthias Krack
     172             : !> \version 1.0
     173             : ! **************************************************************************************************
     174       43850 :    SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
     175             :       TYPE(cp_cfm_type), INTENT(IN)                   :: matrix
     176             :       COMPLEX(kind=dp), INTENT(in)                       :: alpha
     177             :       COMPLEX(kind=dp), INTENT(in), OPTIONAL             :: beta
     178             : 
     179             :       INTEGER                                            :: irow_local, nrow_local
     180             : #if defined(__parallel)
     181             :       INTEGER                                            :: icol_local, ncol_local
     182       43850 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     183             : #endif
     184             : 
     185      131550 :       CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
     186             : 
     187       43850 :       IF (PRESENT(beta)) THEN
     188             : #if defined(__parallel)
     189             :          CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
     190        8156 :                               row_indices=row_indices, col_indices=col_indices)
     191             : 
     192        8156 :          icol_local = 1
     193        8156 :          irow_local = 1
     194             : 
     195       50613 :          DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
     196       50613 :             IF (row_indices(irow_local) < col_indices(icol_local)) THEN
     197           0 :                irow_local = irow_local + 1
     198       42457 :             ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
     199       14237 :                icol_local = icol_local + 1
     200             :             ELSE
     201       28220 :                matrix%local_data(irow_local, icol_local) = beta
     202       28220 :                irow_local = irow_local + 1
     203       28220 :                icol_local = icol_local + 1
     204             :             END IF
     205             :          END DO
     206             : #else
     207             :          nrow_local = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     208             : 
     209             :          DO irow_local = 1, nrow_local
     210             :             matrix%local_data(irow_local, irow_local) = beta
     211             :          END DO
     212             : #endif
     213             :       END IF
     214             : 
     215       43850 :    END SUBROUTINE cp_cfm_set_all
     216             : 
     217             : ! **************************************************************************************************
     218             : !> \brief Get the matrix element by its global index.
     219             : !> \param matrix      full matrix
     220             : !> \param irow_global global row index
     221             : !> \param icol_global global column index
     222             : !> \param alpha       matrix element
     223             : !> \par History
     224             : !>      , TCH, created
     225             : !>      always return the answer
     226             : ! **************************************************************************************************
     227        9378 :    SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
     228             :       TYPE(cp_cfm_type), INTENT(IN)                      :: matrix
     229             :       INTEGER, INTENT(in)                                :: irow_global, icol_global
     230             :       COMPLEX(kind=dp), INTENT(out)                      :: alpha
     231             : 
     232             : #if defined(__parallel)
     233             :       INTEGER                                            :: icol_local, ipcol, iprow, irow_local, &
     234             :                                                             mypcol, myprow, npcol, nprow
     235             :       INTEGER, DIMENSION(9)                              :: desca
     236             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     237             : #endif
     238             : 
     239             : #if defined(__parallel)
     240        9378 :       context => matrix%matrix_struct%context
     241        9378 :       myprow = context%mepos(1)
     242        9378 :       mypcol = context%mepos(2)
     243        9378 :       nprow = context%num_pe(1)
     244        9378 :       npcol = context%num_pe(2)
     245             : 
     246       93780 :       desca(:) = matrix%matrix_struct%descriptor(:)
     247             : 
     248             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     249        9378 :                    irow_local, icol_local, iprow, ipcol)
     250             : 
     251        9378 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     252        4689 :          alpha = matrix%local_data(irow_local, icol_local)
     253        4689 :          CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
     254             :       ELSE
     255        4689 :          CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     256             :       END IF
     257             : 
     258             : #else
     259             :       alpha = matrix%local_data(irow_global, icol_global)
     260             : #endif
     261             : 
     262        9378 :    END SUBROUTINE cp_cfm_get_element
     263             : 
     264             : ! **************************************************************************************************
     265             : !> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
     266             : !> \param matrix      full matrix
     267             : !> \param irow_global global row index
     268             : !> \param icol_global global column index
     269             : !> \param alpha       value of the matrix element
     270             : !> \date    12.06.2001
     271             : !> \author  Matthias Krack
     272             : !> \version 1.0
     273             : ! **************************************************************************************************
     274       79956 :    SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
     275             :       TYPE(cp_cfm_type), INTENT(IN)                   :: matrix
     276             :       INTEGER, INTENT(in)                                :: irow_global, icol_global
     277             :       COMPLEX(kind=dp), INTENT(in)                       :: alpha
     278             : 
     279             : #if defined(__parallel)
     280             :       INTEGER                                            :: icol_local, ipcol, iprow, irow_local, &
     281             :                                                             mypcol, myprow, npcol, nprow
     282             :       INTEGER, DIMENSION(9)                              :: desca
     283             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     284             : #endif
     285             : 
     286             : #if defined(__parallel)
     287       79956 :       context => matrix%matrix_struct%context
     288       79956 :       myprow = context%mepos(1)
     289       79956 :       mypcol = context%mepos(2)
     290       79956 :       nprow = context%num_pe(1)
     291       79956 :       npcol = context%num_pe(2)
     292             : 
     293      799560 :       desca(:) = matrix%matrix_struct%descriptor(:)
     294             : 
     295             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     296       79956 :                    irow_local, icol_local, iprow, ipcol)
     297             : 
     298       79956 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     299       39978 :          matrix%local_data(irow_local, icol_local) = alpha
     300             :       END IF
     301             : 
     302             : #else
     303             :       matrix%local_data(irow_global, icol_global) = alpha
     304             : #endif
     305             : 
     306       79956 :    END SUBROUTINE cp_cfm_set_element
     307             : 
     308             : ! **************************************************************************************************
     309             : !> \brief Extract a sub-matrix from the full matrix:
     310             : !>        op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
     311             : !>        Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
     312             : !> \param fm          full matrix you want to get the elements from
     313             : !> \param target_m    2-D array to store the extracted sub-matrix
     314             : !> \param start_row   global row index of the matrix element target_m(1,1) (defaults to 1)
     315             : !> \param start_col   global column index of the matrix element target_m(1,1) (defaults to 1)
     316             : !> \param n_rows      number of rows to extract (defaults to size(op(target_m),1))
     317             : !> \param n_cols      number of columns to extract (defaults to size(op(target_m),2))
     318             : !> \param transpose   indicates that the extracted sub-matrix target_m should be transposed:
     319             : !>                    op(target_m) = target_m^T if .TRUE.,
     320             : !>                    op(target_m) = target_m   if .FALSE. (defaults to false)
     321             : !> \par History
     322             : !>   * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
     323             : !>   * 01.2018 drop innermost conditional branching [Sergey Chulkov]
     324             : !> \author Lianheng Tong
     325             : !> \note
     326             : !>      Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
     327             : ! **************************************************************************************************
     328         392 :    SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
     329             :       TYPE(cp_cfm_type), INTENT(IN)                      :: fm
     330             :       COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out)     :: target_m
     331             :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     332             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     333             : 
     334             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_get_submatrix'
     335             : 
     336         392 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: local_data
     337             :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
     338             :          ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
     339             :          start_row_global, start_row_local, this_col
     340         392 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     341             :       LOGICAL                                            :: do_zero, tr_a
     342             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     343             : 
     344         392 :       CALL timeset(routineN, handle)
     345             : 
     346        1176 :       IF (SIZE(target_m) /= 0) THEN
     347             : #if defined(__parallel)
     348         392 :          do_zero = .TRUE.
     349             : #else
     350             :          do_zero = .FALSE.
     351             : #endif
     352             : 
     353         392 :          tr_a = .FALSE.
     354         392 :          IF (PRESENT(transpose)) tr_a = transpose
     355             : 
     356             :          ! find out the first and last global row/column indices
     357         392 :          start_row_global = 1
     358         392 :          start_col_global = 1
     359         392 :          IF (PRESENT(start_row)) start_row_global = start_row
     360         392 :          IF (PRESENT(start_col)) start_col_global = start_col
     361             : 
     362         392 :          IF (tr_a) THEN
     363           0 :             end_row_global = SIZE(target_m, 2)
     364           0 :             end_col_global = SIZE(target_m, 1)
     365             :          ELSE
     366         392 :             end_row_global = SIZE(target_m, 1)
     367         392 :             end_col_global = SIZE(target_m, 2)
     368             :          END IF
     369         392 :          IF (PRESENT(n_rows)) end_row_global = n_rows
     370         392 :          IF (PRESENT(n_cols)) end_col_global = n_cols
     371             : 
     372         392 :          end_row_global = end_row_global + start_row_global - 1
     373         392 :          end_col_global = end_col_global + start_col_global - 1
     374             : 
     375             :          CALL cp_cfm_get_info(matrix=fm, &
     376             :                               nrow_global=nrow_global, ncol_global=ncol_global, &
     377             :                               nrow_local=nrow_local, ncol_local=ncol_local, &
     378         392 :                               row_indices=row_indices, col_indices=col_indices)
     379         392 :          IF (end_row_global > nrow_global) THEN
     380             :             end_row_global = nrow_global
     381             :             do_zero = .TRUE.
     382             :          END IF
     383         392 :          IF (end_col_global > ncol_global) THEN
     384             :             end_col_global = ncol_global
     385             :             do_zero = .TRUE.
     386             :          END IF
     387             : 
     388             :          ! find out row/column indices of locally stored matrix elements that needs to be copied.
     389             :          ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
     390        1868 :          DO start_row_local = 1, nrow_local
     391        1868 :             IF (row_indices(start_row_local) >= start_row_global) EXIT
     392             :          END DO
     393             : 
     394        6237 :          DO end_row_local = start_row_local, nrow_local
     395        6237 :             IF (row_indices(end_row_local) > end_row_global) EXIT
     396             :          END DO
     397         392 :          end_row_local = end_row_local - 1
     398             : 
     399        3898 :          DO start_col_local = 1, ncol_local
     400        3898 :             IF (col_indices(start_col_local) >= start_col_global) EXIT
     401             :          END DO
     402             : 
     403        9734 :          DO end_col_local = start_col_local, ncol_local
     404        9734 :             IF (col_indices(end_col_local) > end_col_global) EXIT
     405             :          END DO
     406         392 :          end_col_local = end_col_local - 1
     407             : 
     408         392 :          para_env => fm%matrix_struct%para_env
     409         392 :          local_data => fm%local_data
     410             : 
     411             :          ! wipe the content of the target matrix if:
     412             :          !  * the source matrix is distributed across a number of processes, or
     413             :          !  * not all elements of the target matrix will be assigned, e.g.
     414             :          !        when the target matrix is larger then the source matrix
     415             :          IF (do_zero) &
     416        1176 :             CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
     417             : 
     418         392 :          IF (tr_a) THEN
     419           0 :             DO j = start_col_local, end_col_local
     420           0 :                this_col = col_indices(j) - start_col_global + 1
     421           0 :                DO i = start_row_local, end_row_local
     422           0 :                   target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
     423             :                END DO
     424             :             END DO
     425             :          ELSE
     426        9734 :             DO j = start_col_local, end_col_local
     427        9342 :                this_col = col_indices(j) - start_col_global + 1
     428      147316 :                DO i = start_row_local, end_row_local
     429      146924 :                   target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
     430             :                END DO
     431             :             END DO
     432             :          END IF
     433             : 
     434      577852 :          CALL para_env%sum(target_m)
     435             :       END IF
     436             : 
     437         392 :       CALL timestop(handle)
     438         392 :    END SUBROUTINE cp_cfm_get_submatrix
     439             : 
     440             : ! **************************************************************************************************
     441             : !> \brief Set a sub-matrix of the full matrix:
     442             : !>       matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
     443             : !>       = alpha*op(new_values)(1:n_rows,1:n_cols) +
     444             : !>         beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
     445             : !> \param matrix      full to update
     446             : !> \param new_values  replicated 2-D array that holds new elements of the updated sub-matrix
     447             : !> \param start_row   global row index of the matrix element new_values(1,1) (defaults to 1)
     448             : !> \param start_col   global column index of the matrix element new_values(1,1) (defaults to 1)
     449             : !> \param n_rows      number of rows to update (defaults to size(op(new_values),1))
     450             : !> \param n_cols      number of columns to update (defaults to size(op(new_values),2))
     451             : !> \param alpha       scale factor for the new values (defaults to (1.0,0.0))
     452             : !> \param beta        scale factor for the old values (defaults to (0.0,0.0))
     453             : !> \param transpose   indicates that the matrix new_values should be transposed:
     454             : !>                    op(new_values) = new_values^T if .TRUE.,
     455             : !>                    op(new_values) = new_values   if .FALSE. (defaults to false)
     456             : !> \par History
     457             : !>   * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
     458             : !>   * 01.2018 drop innermost conditional branching [Sergey Chulkov]
     459             : !> \author Lianheng Tong
     460             : !> \note
     461             : !>      Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
     462             : !>      All matrix elements 'new_values' need to be valid on all CPUs
     463             : ! **************************************************************************************************
     464         240 :    SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
     465             :                                    start_col, n_rows, n_cols, alpha, beta, transpose)
     466             :       TYPE(cp_cfm_type), INTENT(IN)                      :: matrix
     467             :       COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in)      :: new_values
     468             :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     469             :       COMPLEX(kind=dp), INTENT(in), OPTIONAL             :: alpha, beta
     470             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     471             : 
     472             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_set_submatrix'
     473             : 
     474             :       COMPLEX(kind=dp)                                   :: al, be
     475         240 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: local_data
     476             :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
     477             :          ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
     478             :          start_row_global, start_row_local, this_col
     479         240 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     480             :       LOGICAL                                            :: tr_a
     481             : 
     482         240 :       CALL timeset(routineN, handle)
     483             : 
     484         240 :       al = z_one
     485         240 :       be = z_zero
     486         240 :       IF (PRESENT(alpha)) al = alpha
     487         240 :       IF (PRESENT(beta)) be = beta
     488             : 
     489             :       ! find out the first and last global row/column indices
     490         240 :       start_row_global = 1
     491         240 :       start_col_global = 1
     492         240 :       IF (PRESENT(start_row)) start_row_global = start_row
     493         240 :       IF (PRESENT(start_col)) start_col_global = start_col
     494             : 
     495         240 :       tr_a = .FALSE.
     496         240 :       IF (PRESENT(transpose)) tr_a = transpose
     497             : 
     498           0 :       IF (tr_a) THEN
     499           0 :          end_row_global = SIZE(new_values, 2)
     500           0 :          end_col_global = SIZE(new_values, 1)
     501             :       ELSE
     502         240 :          end_row_global = SIZE(new_values, 1)
     503         240 :          end_col_global = SIZE(new_values, 2)
     504             :       END IF
     505         240 :       IF (PRESENT(n_rows)) end_row_global = n_rows
     506         240 :       IF (PRESENT(n_cols)) end_col_global = n_cols
     507             : 
     508         240 :       end_row_global = end_row_global + start_row_global - 1
     509         240 :       end_col_global = end_col_global + start_col_global - 1
     510             : 
     511             :       CALL cp_cfm_get_info(matrix=matrix, &
     512             :                            nrow_global=nrow_global, ncol_global=ncol_global, &
     513             :                            nrow_local=nrow_local, ncol_local=ncol_local, &
     514         240 :                            row_indices=row_indices, col_indices=col_indices)
     515         240 :       IF (end_row_global > nrow_global) end_row_global = nrow_global
     516         240 :       IF (end_col_global > ncol_global) end_col_global = ncol_global
     517             : 
     518             :       ! find out row/column indices of locally stored matrix elements that needs to be set.
     519             :       ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
     520         978 :       DO start_row_local = 1, nrow_local
     521         978 :          IF (row_indices(start_row_local) >= start_row_global) EXIT
     522             :       END DO
     523             : 
     524        2919 :       DO end_row_local = start_row_local, nrow_local
     525        2919 :          IF (row_indices(end_row_local) > end_row_global) EXIT
     526             :       END DO
     527         240 :       end_row_local = end_row_local - 1
     528             : 
     529         944 :       DO start_col_local = 1, ncol_local
     530         944 :          IF (col_indices(start_col_local) >= start_col_global) EXIT
     531             :       END DO
     532             : 
     533        6698 :       DO end_col_local = start_col_local, ncol_local
     534        6698 :          IF (col_indices(end_col_local) > end_col_global) EXIT
     535             :       END DO
     536         240 :       end_col_local = end_col_local - 1
     537             : 
     538         240 :       local_data => matrix%local_data
     539             : 
     540         240 :       IF (al == z_one .AND. be == z_zero) THEN
     541         240 :          IF (tr_a) THEN
     542           0 :             DO j = start_col_local, end_col_local
     543           0 :                this_col = col_indices(j) - start_col_global + 1
     544           0 :                DO i = start_row_local, end_row_local
     545           0 :                   local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
     546             :                END DO
     547             :             END DO
     548             :          ELSE
     549        6698 :             DO j = start_col_local, end_col_local
     550        6458 :                this_col = col_indices(j) - start_col_global + 1
     551       84098 :                DO i = start_row_local, end_row_local
     552       83858 :                   local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
     553             :                END DO
     554             :             END DO
     555             :          END IF
     556             :       ELSE
     557           0 :          IF (tr_a) THEN
     558           0 :             DO j = start_col_local, end_col_local
     559           0 :                this_col = col_indices(j) - start_col_global + 1
     560           0 :                DO i = start_row_local, end_row_local
     561             :                   local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
     562           0 :                                      be*local_data(i, j)
     563             :                END DO
     564             :             END DO
     565             :          ELSE
     566           0 :             DO j = start_col_local, end_col_local
     567           0 :                this_col = col_indices(j) - start_col_global + 1
     568           0 :                DO i = start_row_local, end_row_local
     569             :                   local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
     570           0 :                                      be*local_data(i, j)
     571             :                END DO
     572             :             END DO
     573             :          END IF
     574             :       END IF
     575             : 
     576         240 :       CALL timestop(handle)
     577         240 :    END SUBROUTINE cp_cfm_set_submatrix
     578             : 
     579             : ! **************************************************************************************************
     580             : !> \brief Returns information about a full matrix.
     581             : !> \param matrix        matrix
     582             : !> \param name          name of the matrix
     583             : !> \param nrow_global   total number of rows
     584             : !> \param ncol_global   total number of columns
     585             : !> \param nrow_block    number of rows per ScaLAPACK block
     586             : !> \param ncol_block    number of columns per ScaLAPACK block
     587             : !> \param nrow_local    number of locally stored rows
     588             : !> \param ncol_local    number of locally stored columns
     589             : !> \param row_indices   global indices of locally stored rows
     590             : !> \param col_indices   global indices of locally stored columns
     591             : !> \param local_data    locally stored matrix elements
     592             : !> \param context       BLACS context
     593             : !> \param matrix_struct matrix structure
     594             : !> \param para_env      parallel environment
     595             : !> \date    12.06.2001
     596             : !> \author  Matthias Krack
     597             : !> \version 1.0
     598             : ! **************************************************************************************************
     599      246658 :    SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
     600             :                               nrow_block, ncol_block, nrow_local, ncol_local, &
     601             :                               row_indices, col_indices, local_data, context, &
     602             :                               matrix_struct, para_env)
     603             :       TYPE(cp_cfm_type), INTENT(IN)                      :: matrix
     604             :       CHARACTER(len=*), INTENT(OUT), OPTIONAL            :: name
     605             :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
     606             :                                                             ncol_block, nrow_local, ncol_local
     607             :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
     608             :       COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
     609             :          OPTIONAL, POINTER                               :: local_data
     610             :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
     611             :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
     612             :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
     613             : 
     614           0 :       IF (PRESENT(name)) name = matrix%name
     615      246658 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
     616      246658 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
     617             : 
     618             :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
     619             :                             ncol_local=ncol_local, nrow_global=nrow_global, &
     620             :                             ncol_global=ncol_global, nrow_block=nrow_block, &
     621             :                             ncol_block=ncol_block, context=context, &
     622      246658 :                             row_indices=row_indices, col_indices=col_indices, para_env=para_env)
     623             : 
     624      246658 :    END SUBROUTINE cp_cfm_get_info
     625             : 
     626             : ! **************************************************************************************************
     627             : !> \brief Copy content of a full matrix into another full matrix of the same size.
     628             : !> \param source      source matrix
     629             : !> \param destination destination matrix
     630             : !> \author Joost VandeVondele
     631             : ! **************************************************************************************************
     632      216852 :    SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
     633             :       TYPE(cp_cfm_type), INTENT(IN)                      :: source, destination
     634             : 
     635             :       INTEGER                                            :: npcol, nprow
     636             : 
     637      216852 :       nprow = source%matrix_struct%context%num_pe(1)
     638      216852 :       npcol = source%matrix_struct%context%num_pe(2)
     639             : 
     640      216852 :       IF (.NOT. cp2k_is_parallel .OR. &
     641             :           cp_fm_struct_equivalent(source%matrix_struct, &
     642             :                                   destination%matrix_struct)) THEN
     643      216852 :          IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
     644             :              SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
     645           0 :             CPABORT("internal local_data has different sizes")
     646      650556 :          CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
     647             :       ELSE
     648           0 :          IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
     649           0 :             CPABORT("cannot copy between full matrixes of differen sizes")
     650           0 :          IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
     651           0 :             CPABORT("cannot copy between full matrixes of differen sizes")
     652             : #if defined(__parallel)
     653             :          CALL pzcopy(source%matrix_struct%nrow_global* &
     654             :                      source%matrix_struct%ncol_global, &
     655             :                      source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
     656           0 :                      destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
     657             : #else
     658             :          CPABORT("")
     659             : #endif
     660             :       END IF
     661      216852 :    END SUBROUTINE cp_cfm_to_cfm_matrix
     662             : 
     663             : ! **************************************************************************************************
     664             : !> \brief Copy a number of sequential columns of a full matrix into another full matrix.
     665             : !> \param msource      source matrix
     666             : !> \param mtarget      destination matrix
     667             : !> \param ncol         number of columns to copy
     668             : !> \param source_start global index of the first column to copy within the source matrix
     669             : !> \param target_start global index of the first column to copy within the destination matrix
     670             : ! **************************************************************************************************
     671       15466 :    SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
     672             :                                     target_start)
     673             : 
     674             :       TYPE(cp_cfm_type), INTENT(IN)                   :: msource, mtarget
     675             :       INTEGER, INTENT(IN)                                :: ncol
     676             :       INTEGER, INTENT(IN), OPTIONAL                      :: source_start, target_start
     677             : 
     678             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_columns'
     679             : 
     680       15466 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: a, b
     681             :       INTEGER                                            :: handle, n, ss, ts
     682             : #if defined(__parallel)
     683             :       INTEGER                                            :: i
     684             :       INTEGER, DIMENSION(9)                              :: desca, descb
     685             : #endif
     686             : 
     687       15466 :       CALL timeset(routineN, handle)
     688             : 
     689       15466 :       ss = 1
     690       15466 :       ts = 1
     691             : 
     692       15466 :       IF (PRESENT(source_start)) ss = source_start
     693       15466 :       IF (PRESENT(target_start)) ts = target_start
     694             : 
     695       15466 :       n = msource%matrix_struct%nrow_global
     696             : 
     697       15466 :       a => msource%local_data
     698       15466 :       b => mtarget%local_data
     699             : 
     700             : #if defined(__parallel)
     701      154660 :       desca(:) = msource%matrix_struct%descriptor(:)
     702      154660 :       descb(:) = mtarget%matrix_struct%descriptor(:)
     703      304180 :       DO i = 0, ncol - 1
     704      304180 :          CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
     705             :       END DO
     706             : #else
     707             :       CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
     708             : #endif
     709             : 
     710       15466 :       CALL timestop(handle)
     711             : 
     712       15466 :    END SUBROUTINE cp_cfm_to_cfm_columns
     713             : 
     714             : ! **************************************************************************************************
     715             : !> \brief Copy just a triangular matrix.
     716             : !> \param msource source matrix
     717             : !> \param mtarget target matrix
     718             : !> \param uplo    'U' for upper triangular, 'L' for lower triangular
     719             : ! **************************************************************************************************
     720           0 :    SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
     721             :       TYPE(cp_cfm_type), INTENT(IN)                   :: msource, mtarget
     722             :       CHARACTER(len=*), INTENT(IN)                       :: uplo
     723             : 
     724             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_triangular'
     725             : 
     726           0 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: aa, bb
     727             :       INTEGER                                            :: handle, ncol, nrow
     728             : #if defined(__parallel)
     729             :       INTEGER, DIMENSION(9)                              :: desca, descb
     730             : #endif
     731             : 
     732           0 :       CALL timeset(routineN, handle)
     733             : 
     734           0 :       nrow = msource%matrix_struct%nrow_global
     735           0 :       ncol = msource%matrix_struct%ncol_global
     736             : 
     737           0 :       aa => msource%local_data
     738           0 :       bb => mtarget%local_data
     739             : 
     740             : #if defined(__parallel)
     741           0 :       desca(:) = msource%matrix_struct%descriptor(:)
     742           0 :       descb(:) = mtarget%matrix_struct%descriptor(:)
     743           0 :       CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
     744             : #else
     745             :       CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
     746             : #endif
     747             : 
     748           0 :       CALL timestop(handle)
     749           0 :    END SUBROUTINE cp_cfm_to_cfm_triangular
     750             : 
     751             : ! **************************************************************************************************
     752             : !> \brief Copy real and imaginary parts of a complex full matrix into
     753             : !>        separate real-value full matrices.
     754             : !> \param msource    complex matrix
     755             : !> \param mtargetr   (optional) real part of the source matrix
     756             : !> \param mtargeti   (optional) imaginary part of the source matrix
     757             : !> \note
     758             : !>        Matrix structures are assumed to be equivalent.
     759             : ! **************************************************************************************************
     760       74318 :    SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
     761             : 
     762             :       TYPE(cp_cfm_type), INTENT(IN)                      :: msource
     763             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: mtargetr, mtargeti
     764             : 
     765             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_cfm_to_fm'
     766             : 
     767       74318 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: zmat
     768             :       INTEGER                                            :: handle
     769       74318 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: imat, rmat
     770             : 
     771       74318 :       CALL timeset(routineN, handle)
     772             : 
     773       74318 :       zmat => msource%local_data
     774       74318 :       IF (PRESENT(mtargetr)) THEN
     775       69856 :          rmat => mtargetr%local_data
     776             :          IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
     777       69856 :              (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
     778             :              (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
     779           0 :             CPABORT("size of local_data of mtargetr differ to msource")
     780             :          END IF
     781             :          ! copy local data
     782    35741121 :          rmat = REAL(zmat, kind=dp)
     783             :       ELSE
     784       74318 :          NULLIFY (rmat)
     785             :       END IF
     786       74318 :       IF (PRESENT(mtargeti)) THEN
     787       59734 :          imat => mtargeti%local_data
     788             :          IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
     789       59734 :              (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
     790             :              (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
     791           0 :             CPABORT("size of local_data of mtargeti differ to msource")
     792             :          END IF
     793             :          ! copy local data
     794    35267023 :          imat = REAL(AIMAG(zmat), kind=dp)
     795             :       ELSE
     796       74318 :          NULLIFY (imat)
     797             :       END IF
     798             : 
     799       74318 :       CALL timestop(handle)
     800             : 
     801       74318 :    END SUBROUTINE cp_cfm_to_fm
     802             : 
     803             : ! **************************************************************************************************
     804             : !> \brief Construct a complex full matrix by taking its real and imaginary parts from
     805             : !>        two separate real-value full matrices.
     806             : !> \param msourcer   (optional) real part of the complex matrix (defaults to 0.0)
     807             : !> \param msourcei   (optional) imaginary part of the complex matrix (defaults to 0.0)
     808             : !> \param mtarget    resulting complex matrix
     809             : !> \note
     810             : !>        Matrix structures are assumed to be equivalent.
     811             : ! **************************************************************************************************
     812      100947 :    SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
     813             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: msourcer, msourcei
     814             :       TYPE(cp_cfm_type), INTENT(IN)                      :: mtarget
     815             : 
     816             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_to_cfm'
     817             : 
     818      100947 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: zmat
     819             :       INTEGER                                            :: handle, mode
     820      100947 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: imat, rmat
     821             : 
     822      100947 :       CALL timeset(routineN, handle)
     823             : 
     824      100947 :       mode = 0
     825      100947 :       zmat => mtarget%local_data
     826      100947 :       IF (PRESENT(msourcer)) THEN
     827       98499 :          rmat => msourcer%local_data
     828             :          IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
     829       98499 :              (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
     830             :              (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
     831           0 :             CPABORT("size of local_data of msourcer differ to mtarget")
     832             :          END IF
     833             :          mode = mode + 1
     834             :       ELSE
     835             :          NULLIFY (rmat)
     836             :       END IF
     837      100947 :       IF (PRESENT(msourcei)) THEN
     838       55080 :          imat => msourcei%local_data
     839             :          IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
     840       55080 :              (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
     841             :              (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
     842           0 :             CPABORT("size of local_data of msourcei differ to mtarget")
     843             :          END IF
     844       55080 :          mode = mode + 2
     845             :       ELSE
     846             :          NULLIFY (imat)
     847             :       END IF
     848             :       ! copy local data
     849             :       SELECT CASE (mode)
     850             :       CASE (0)
     851           0 :          zmat(:, :) = z_zero
     852             :       CASE (1)
     853     5854228 :          zmat(:, :) = CMPLX(rmat(:, :), 0.0_dp, kind=dp)
     854             :       CASE (2)
     855       12240 :          zmat(:, :) = CMPLX(0.0_dp, imat(:, :), kind=dp)
     856             :       CASE (3)
     857    20042161 :          zmat(:, :) = CMPLX(rmat(:, :), imat(:, :), kind=dp)
     858             :       END SELECT
     859             : 
     860      100947 :       CALL timestop(handle)
     861             : 
     862      100947 :    END SUBROUTINE cp_fm_to_cfm
     863             : 
     864             : ! **************************************************************************************************
     865             : !> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
     866             : !> \param source       input complex-valued fm matrix
     867             : !> \param destination  output complex-valued fm matrix
     868             : !> \param para_env     parallel environment corresponding to the BLACS env that covers all parts
     869             : !>                     of the input and output matrices
     870             : !> \param info         all of the data that will be needed to complete the copy operation
     871             : !> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
     872             : !>       allocatable arrays instead of pointers wherever possible.
     873             : ! **************************************************************************************************
     874       91000 :    SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
     875             :       TYPE(cp_cfm_type), INTENT(IN)                      :: source, destination
     876             :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env
     877             :       TYPE(copy_cfm_info_type), INTENT(out)              :: info
     878             : 
     879             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_start_copy_general'
     880             : 
     881             :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
     882             :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
     883             :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
     884             :          recv_rank, recv_size, send_rank, send_size
     885        9100 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
     886       18200 :                                                             recv_count, send_count, send_disp, &
     887        9100 :                                                             source2global, src_p, src_q
     888        9100 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
     889             :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
     890             :                                                             src_block, src_block_tmp, src_num_pe
     891       18200 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
     892       18200 :                                                             send_col_indices, send_row_indices
     893             :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
     894      127400 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
     895             : 
     896        9100 :       CALL timeset(routineN, handle)
     897             : 
     898             :       IF (.NOT. cp2k_is_parallel) THEN
     899             :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
     900             :          nrow_local_send = SIZE(source%local_data, 1)
     901             :          ncol_local_send = SIZE(source%local_data, 2)
     902             :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
     903             :          k = 0
     904             :          DO j = 1, ncol_local_send
     905             :             DO i = 1, nrow_local_send
     906             :                k = k + 1
     907             :                info%send_buf(k) = source%local_data(i, j)
     908             :             END DO
     909             :          END DO
     910             :       ELSE
     911        9100 :          NULLIFY (recv_dist, send_dist)
     912        9100 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
     913             : 
     914             :          ! The 'global' communicator contains both the source and destination decompositions
     915        9100 :          global_size = para_env%num_pe
     916        9100 :          global_rank = para_env%mepos
     917             : 
     918             :          ! The source/send decomposition and destination/recv decompositions may only exist on
     919             :          ! on a subset of the processes involved in the communication
     920             :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
     921             :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
     922        9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
     923        9100 :             recv_dist => destination%matrix_struct
     924        9100 :             recv_rank = recv_dist%para_env%mepos
     925             :          ELSE
     926           0 :             recv_rank = mp_proc_null
     927             :          END IF
     928             : 
     929        9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
     930        4550 :             send_dist => source%matrix_struct
     931        4550 :             send_rank = send_dist%para_env%mepos
     932             :          ELSE
     933        4550 :             send_rank = mp_proc_null
     934             :          END IF
     935             : 
     936             :          ! Map the rank in the source/dest communicator to the global rank
     937       27300 :          ALLOCATE (all_ranks(0:global_size - 1))
     938             : 
     939        9100 :          CALL para_env%allgather(send_rank, all_ranks)
     940        9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
     941       45500 :             ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
     942       27300 :             DO i = 0, global_size - 1
     943       27300 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
     944        9100 :                   source2global(all_ranks(i)) = i
     945             :                END IF
     946             :             END DO
     947             :          END IF
     948             : 
     949        9100 :          CALL para_env%allgather(recv_rank, all_ranks)
     950        9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
     951       22750 :             ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
     952       13650 :             DO i = 0, global_size - 1
     953       13650 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
     954        9100 :                   dest2global(all_ranks(i)) = i
     955             :                END IF
     956             :             END DO
     957             :          END IF
     958        9100 :          DEALLOCATE (all_ranks)
     959             : 
     960             :          ! Some data from the two decompositions will be needed by all processes in the global group :
     961             :          ! process grid shape, block size, and the BLACS-to-MPI mapping
     962             : 
     963             :          ! The global root process will receive the data (from the root process in each decomposition)
     964       63700 :          send_req(:) = mp_request_null
     965        9100 :          IF (global_rank == 0) THEN
     966       31850 :             recv_req(:) = mp_request_null
     967        4550 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
     968        4550 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
     969        4550 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
     970        4550 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
     971             :          END IF
     972             : 
     973        9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
     974        4550 :             IF ((send_rank == 0)) THEN
     975             :                ! need to use separate buffers here in case this is actually global rank 0
     976       13650 :                src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
     977        4550 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
     978        4550 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
     979             :             END IF
     980             :          END IF
     981             : 
     982        9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
     983        9100 :             IF ((recv_rank == 0)) THEN
     984       13650 :                dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
     985        4550 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
     986        4550 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
     987             :             END IF
     988             :          END IF
     989             : 
     990        9100 :          IF (global_rank == 0) THEN
     991        4550 :             CALL mp_waitall(recv_req(1:4))
     992             :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
     993           0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
     994       31850 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
     995        4550 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
     996        4550 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
     997             :          END IF
     998             : 
     999        9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1000        4550 :             IF ((send_rank == 0)) THEN
    1001        4550 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1002             :             END IF
    1003             :          END IF
    1004             : 
    1005        9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1006        9100 :             IF ((recv_rank == 0)) THEN
    1007        4550 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1008             :             END IF
    1009             :          END IF
    1010             : 
    1011        9100 :          IF (global_rank == 0) THEN
    1012        4550 :             CALL mp_waitall(recv_req(5:6))
    1013             :          END IF
    1014             : 
    1015             :          ! Finally, broadcast the data to all processes in the global communicator
    1016        9100 :          CALL para_env%bcast(src_block, 0)
    1017        9100 :          CALL para_env%bcast(dest_block, 0)
    1018        9100 :          CALL para_env%bcast(src_num_pe, 0)
    1019        9100 :          CALL para_env%bcast(dest_num_pe, 0)
    1020       27300 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1021       27300 :          info%nblock_src(1:2) = src_block(1:2)
    1022        9100 :          IF (global_rank /= 0) THEN
    1023           0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1024       31850 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
    1025             :          END IF
    1026        9100 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1027        9100 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1028             : 
    1029        9100 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1030        9100 :          send_size = src_num_pe(1)*src_num_pe(2)
    1031        9100 :          info%send_size = send_size
    1032        9100 :          CALL mp_waitall(send_req(:))
    1033             : 
    1034             :          ! Setup is now complete, we can start the actual communication here.
    1035             :          ! The order implemented here is:
    1036             :          !  DEST_1
    1037             :          !      compute recv sizes
    1038             :          !      call irecv
    1039             :          !  SRC_1
    1040             :          !      compute send sizes
    1041             :          !      pack send buffers
    1042             :          !      call isend
    1043             :          !  DEST_2
    1044             :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1045             :          !  SRC_2
    1046             :          !      wait for the sends
    1047             : 
    1048             :          ! DEST_1
    1049        9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1050             :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1051        9100 :                                   col_indices=recv_col_indices)
    1052        9100 :             info%recv_col_indices => recv_col_indices
    1053        9100 :             info%recv_row_indices => recv_row_indices
    1054        9100 :             nrow_block_src = src_block(1)
    1055        9100 :             ncol_block_src = src_block(2)
    1056       54600 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1057             : 
    1058             :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1059        9100 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1060        9100 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1061        9100 :             info%nlocal_recv(1) = nrow_local_recv
    1062        9100 :             info%nlocal_recv(2) = ncol_local_recv
    1063             :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1064       45500 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1065       98140 :             DO i = 1, nrow_local_recv
    1066             :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1067             :                ! then work out which row block it comes from, and which process row that row block comes from.
    1068       98140 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1069             :             END DO
    1070      187180 :             DO j = 1, ncol_local_recv
    1071             :                ! Similarly for the columns
    1072      187180 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1073             :             END DO
    1074             :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1075             : 
    1076       18200 :             DO q = 0, src_num_pe(2) - 1
    1077      187180 :                ncols = COUNT(src_q .EQ. q)
    1078       27300 :                DO p = 0, src_num_pe(1) - 1
    1079       98140 :                   nrows = COUNT(src_p .EQ. p)
    1080             :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1081       18200 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1082             :                END DO
    1083             :             END DO
    1084        9100 :             DEALLOCATE (src_p, src_q)
    1085             : 
    1086             :             ! Use one long buffer (and displacements into that buffer)
    1087             :             !     this prevents the need for a rectangular array where not all elements will be populated
    1088       36400 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1089        9100 :             info%recv_disp(0) = 0
    1090        9100 :             DO i = 1, send_size - 1
    1091        9100 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1092             :             END DO
    1093             : 
    1094             :             ! Issue receive calls on ranks which expect data
    1095       18200 :             DO k = 0, send_size - 1
    1096       18200 :                IF (recv_count(k) .GT. 0) THEN
    1097             :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1098        9100 :                                       source2global(k), info%recv_request(k))
    1099             :                ELSE
    1100           0 :                   info%recv_request(k) = mp_request_null
    1101             :                END IF
    1102             :             END DO
    1103        9100 :             DEALLOCATE (source2global)
    1104             :          END IF ! ASSOCIATED(destination)
    1105             : 
    1106             :          ! SRC_1
    1107        9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1108             :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1109        4550 :                                   col_indices=send_col_indices)
    1110        4550 :             nrow_block_dest = dest_block(1)
    1111        4550 :             ncol_block_dest = dest_block(2)
    1112       31850 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1113             : 
    1114             :             ! Determine the send counts, allocate the send buffers
    1115        4550 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1116        4550 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1117             : 
    1118             :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1119             :             !   i.e. number of rows,cols in the sending distribution
    1120       22750 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1121             : 
    1122       93590 :             DO i = 1, nrow_local_send
    1123             :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1124       93590 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1125             :             END DO
    1126       93590 :             DO j = 1, ncol_local_send
    1127       93590 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1128             :             END DO
    1129             :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1130             : 
    1131        9100 :             DO q = 0, dest_num_pe(2) - 1
    1132       93590 :                ncols = COUNT(dest_q .EQ. q)
    1133       18200 :                DO p = 0, dest_num_pe(1) - 1
    1134      187180 :                   nrows = COUNT(dest_p .EQ. p)
    1135       13650 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1136             :                END DO
    1137             :             END DO
    1138        4550 :             DEALLOCATE (dest_p, dest_q)
    1139             : 
    1140             :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1141       22750 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1142        4550 :             send_disp(0) = 0
    1143        9100 :             DO k = 1, recv_size - 1
    1144        9100 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1145             :             END DO
    1146             : 
    1147             :             ! Loop over the smat, pack the send buffers
    1148       13650 :             send_count(:) = 0
    1149       93590 :             DO j = 1, ncol_local_send
    1150             :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1151       89040 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1152     2069270 :                DO i = 1, nrow_local_send
    1153     1975680 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1154     1975680 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1155     1975680 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1156     2064720 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1157             :                END DO
    1158             :             END DO
    1159             : 
    1160             :             ! For each non-zero send_count, call mpi_isend
    1161       13650 :             DO k = 0, recv_size - 1
    1162       13650 :                IF (send_count(k) .GT. 0) THEN
    1163             :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1164        9100 :                                       dest2global(k), info%send_request(k))
    1165             :                ELSE
    1166           0 :                   info%send_request(k) = mp_request_null
    1167             :                END IF
    1168             :             END DO
    1169        4550 :             DEALLOCATE (send_count, send_disp, dest2global)
    1170             :          END IF ! ASSOCIATED(source)
    1171        9100 :          DEALLOCATE (dest_blacs2mpi)
    1172             : 
    1173             :       END IF !IF (.NOT. cp2k_is_parallel)
    1174             : 
    1175        9100 :       CALL timestop(handle)
    1176             : 
    1177       36400 :    END SUBROUTINE cp_cfm_start_copy_general
    1178             : 
    1179             : ! **************************************************************************************************
    1180             : !> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
    1181             : !> \param destination  output cfm matrix
    1182             : !> \param info         all of the data that will be needed to complete the copy operation
    1183             : !> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
    1184             : !>       allocatable arrays instead of pointers wherever possible.
    1185             : ! **************************************************************************************************
    1186        9100 :    SUBROUTINE cp_cfm_finish_copy_general(destination, info)
    1187             :       TYPE(cp_cfm_type), INTENT(IN)                      :: destination
    1188             :       TYPE(copy_cfm_info_type), INTENT(inout)            :: info
    1189             : 
    1190             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_finish_copy_general'
    1191             : 
    1192             :       INTEGER                                            :: handle, i, j, k, mpi_rank, ni, nj, &
    1193             :                                                             src_q_j
    1194        9100 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count, src_p_i
    1195        9100 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1196             : 
    1197        9100 :       CALL timeset(routineN, handle)
    1198             : 
    1199             :       IF (.NOT. cp2k_is_parallel) THEN
    1200             :          ! Now unpack the data from the 'send buffer'
    1201             :          k = 0
    1202             :          DO j = 1, SIZE(destination%local_data, 2)
    1203             :             DO i = 1, SIZE(destination%local_data, 1)
    1204             :                k = k + 1
    1205             :                destination%local_data(i, j) = info%send_buf(k)
    1206             :             END DO
    1207             :          END DO
    1208             :          DEALLOCATE (info%send_buf)
    1209             :       ELSE
    1210             :          ! Set up local variables ...
    1211        9100 :          recv_col_indices => info%recv_col_indices
    1212        9100 :          recv_row_indices => info%recv_row_indices
    1213             : 
    1214             :          ! ... use the local variables to do the work
    1215             :          ! DEST_2
    1216        9100 :          CALL mp_waitall(info%recv_request(:))
    1217             : 
    1218        9100 :          nj = info%nlocal_recv(2)
    1219        9100 :          ni = info%nlocal_recv(1)
    1220       45500 :          ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
    1221             :          ! Loop over the rmat, filling it in with data from the recv buffers
    1222             :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1223       18200 :          recv_count(:) = 0
    1224       98140 :          DO i = 1, ni
    1225       98140 :             src_p_i(i) = MOD(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
    1226             :          END DO
    1227             : 
    1228      187180 :          DO j = 1, nj
    1229      178080 :             src_q_j = MOD(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
    1230     2162860 :             DO i = 1, ni
    1231     1975680 :                mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
    1232     1975680 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    1233     2153760 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    1234             :             END DO
    1235             :          END DO
    1236             : 
    1237        9100 :          DEALLOCATE (recv_count, src_p_i)
    1238             :          ! Invalidate the stored state
    1239        9100 :          NULLIFY (info%recv_col_indices, info%recv_row_indices)
    1240        9100 :          DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    1241             :       END IF
    1242             : 
    1243        9100 :       CALL timestop(handle)
    1244             : 
    1245        9100 :    END SUBROUTINE cp_cfm_finish_copy_general
    1246             : 
    1247             : ! **************************************************************************************************
    1248             : !> \brief Complete the copy operation: wait for comms clean up MPI state.
    1249             : !> \param info    all of the data that will be needed to complete the copy operation
    1250             : !> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
    1251             : !>       allocatable arrays instead of pointers wherever possible.
    1252             : ! **************************************************************************************************
    1253        4550 :    SUBROUTINE cp_cfm_cleanup_copy_general(info)
    1254             :       TYPE(copy_cfm_info_type), INTENT(inout)            :: info
    1255             : 
    1256             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_cleanup_copy_general'
    1257             : 
    1258             :       INTEGER                                            :: handle
    1259             : 
    1260        4550 :       CALL timeset(routineN, handle)
    1261             : 
    1262             :       IF (cp2k_is_parallel) THEN
    1263             :          ! SRC_2
    1264             :          ! If this process is also in the destination decomposition, this deallocate
    1265             :          ! Was already done in cp_fm_finish_copy_general
    1266        4550 :          IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
    1267        4550 :          CALL mp_waitall(info%send_request(:))
    1268        4550 :          DEALLOCATE (info%send_request, info%send_buf)
    1269             :       END IF
    1270             : 
    1271        4550 :       CALL timestop(handle)
    1272        4550 :    END SUBROUTINE cp_cfm_cleanup_copy_general
    1273           0 : END MODULE cp_cfm_types

Generated by: LCOV version 1.15