LCOV - code coverage report
Current view: top level - src/fm - cp_fm_types.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:4c33f95) Lines: 704 824 85.4 %
Date: 2025-01-30 06:53:08 Functions: 36 45 80.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief represent a full matrix distributed on many processors
      10             : !> \par History
      11             : !>      3) separated structure object, removed globenv, renamed to full matrix
      12             : !>         many changes (fawzi 08.2002)
      13             : !> \author Matthias Krack (22.05.2001)
      14             : ! **************************************************************************************************
      15             : MODULE cp_fm_types
      16             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17             :    USE cp_blacs_types,                  ONLY: cp_blacs_type
      18             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_equivalent,&
      19             :                                               cp_fm_struct_get,&
      20             :                                               cp_fm_struct_release,&
      21             :                                               cp_fm_struct_retain,&
      22             :                                               cp_fm_struct_type,&
      23             :                                               cp_fm_struct_write_info
      24             :    USE kinds,                           ONLY: dp,&
      25             :                                               sp
      26             :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      27             :                                               mp_any_source,&
      28             :                                               mp_para_env_type,&
      29             :                                               mp_proc_null,&
      30             :                                               mp_request_null,&
      31             :                                               mp_request_type,&
      32             :                                               mp_waitall
      33             :    USE parallel_rng_types,              ONLY: UNIFORM,&
      34             :                                               rng_stream_type
      35             : #include "../base/base_uses.f90"
      36             : 
      37             :    IMPLICIT NONE
      38             : 
      39             :    PRIVATE
      40             : 
      41             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
      42             :    LOGICAL, PARAMETER          :: debug_this_module = .TRUE.
      43             :    INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      44             : 
      45             :    INTEGER, PRIVATE :: cp_fm_mm_type = 1
      46             : 
      47             :    PUBLIC :: cp_fm_type, &
      48             :              cp_fm_p_type, copy_info_type
      49             : 
      50             :    PUBLIC :: cp_fm_add_to_element, &
      51             :              cp_fm_create, &
      52             :              cp_fm_release, &
      53             :              cp_fm_get_info, &
      54             :              cp_fm_set_element, &
      55             :              cp_fm_get_element, &
      56             :              cp_fm_get_diag, & ! get diagonal
      57             :              cp_fm_set_all, & ! set all elements and diagonal
      58             :              cp_fm_set_submatrix, & ! set a submatrix to given values
      59             :              cp_fm_get_submatrix, & ! get a submatrix of given values
      60             :              cp_fm_init_random, &
      61             :              cp_fm_maxabsval, & ! find the maximum absolute value
      62             :              cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
      63             :              cp_fm_to_fm, & ! copy (parts of) a fm to a fm
      64             :              cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
      65             :              cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
      66             :              cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
      67             :              cp_fm_to_fm_triangular, &
      68             :              cp_fm_copy_general, &
      69             :              cp_fm_start_copy_general, &
      70             :              cp_fm_finish_copy_general, &
      71             :              cp_fm_cleanup_copy_general, &
      72             :              cp_fm_write_unformatted, & ! writes a full matrix to an open unit
      73             :              cp_fm_write_formatted, & ! writes a full matrix to an open unit
      74             :              cp_fm_read_unformatted, & ! reads a full matrix from an open unit
      75             :              cp_fm_setup, & ! allows to set flags for fms
      76             :              cp_fm_get_mm_type, &
      77             :              cp_fm_write_info, &
      78             :              cp_fm_to_fm_submat_general ! copy matrix across different contexts
      79             : 
      80             :    PUBLIC :: cp_fm_pilaenv
      81             : 
      82             :    INTERFACE cp_fm_to_fm
      83             :       MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
      84             :          cp_fm_to_fm_columns ! just a number of columns
      85             :    END INTERFACE
      86             : 
      87             :    INTERFACE cp_fm_release
      88             :       MODULE PROCEDURE cp_fm_release_aa0, &
      89             :          cp_fm_release_aa1, &
      90             :          cp_fm_release_aa2, &
      91             :          cp_fm_release_aa3, &
      92             :          cp_fm_release_ap1, &
      93             :          cp_fm_release_ap2, &
      94             :          cp_fm_release_pa1, &
      95             :          cp_fm_release_pa2, &
      96             :          cp_fm_release_pa3, &
      97             :          cp_fm_release_pp1, &
      98             :          cp_fm_release_pp2
      99             :    END INTERFACE
     100             : 
     101             : ! **************************************************************************************************
     102             : !> \brief represent a full matrix
     103             : !> \param name the name of the matrix, used for printing
     104             : !> \param matrix_struct structure of this matrix
     105             : !> \param local_data array with the data of the matrix (its contents
     106             : !>        depend on the matrix type used: in parallel runs it will be
     107             : !>        in scalapack format, in sequential, it will simply contain
     108             : !>        the matrix)
     109             : !> \par History
     110             : !>      08.2002 created [fawzi]
     111             : !> \author fawzi
     112             : ! **************************************************************************************************
     113             :    TYPE cp_fm_type
     114             : !    PRIVATE
     115             :       CHARACTER(LEN=60) :: name = ""
     116             :       LOGICAL :: use_sp = .FALSE.
     117             :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
     118             :       REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
     119             :       REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => NULL()
     120             :    END TYPE cp_fm_type
     121             : 
     122             : ! **************************************************************************************************
     123             : !> \brief just to build arrays of pointers to matrices
     124             : !> \param matrix the pointer to the matrix
     125             : !> \par History
     126             : !>      08.2002 created [fawzi]
     127             : !> \author fawzi
     128             : ! **************************************************************************************************
     129             :    TYPE cp_fm_p_type
     130             :       TYPE(cp_fm_type), POINTER :: matrix => NULL()
     131             :    END TYPE cp_fm_p_type
     132             : 
     133             : ! **************************************************************************************************
     134             : !> \brief Stores the state of a copy between cp_fm_start_copy_general
     135             : !>        and cp_fm_finish_copy_general
     136             : !> \par History
     137             : !>      Jan 2017  [Mark T]
     138             : ! **************************************************************************************************
     139             :    TYPE copy_info_type
     140             :       INTEGER :: send_size = -1
     141             :       INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row  2->col
     142             :       TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
     143             :       INTEGER, DIMENSION(:), ALLOCATABLE   :: recv_disp
     144             :       INTEGER, DIMENSION(:), POINTER       :: recv_col_indices => NULL(), recv_row_indices => NULL()
     145             :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
     146             :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
     147             :    END TYPE copy_info_type
     148             : 
     149             : CONTAINS
     150             : 
     151             : ! **************************************************************************************************
     152             : !> \brief creates a new full matrix with the given structure
     153             : !> \param matrix the matrix to be created
     154             : !> \param matrix_struct the structure of matrix
     155             : !> \param name ...
     156             : !> \param use_sp ...
     157             : !> \par History
     158             : !>      08.2002 created [fawzi]
     159             : !> \author Fawzi Mohamed
     160             : !> \note
     161             : !>      preferred allocation routine
     162             : ! **************************************************************************************************
     163     1275188 :    SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
     164             :       TYPE(cp_fm_type), INTENT(OUT)                      :: matrix
     165             :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     166             :       CHARACTER(LEN=*), INTENT(in), OPTIONAL             :: name
     167             :       LOGICAL, INTENT(in), OPTIONAL                      :: use_sp
     168             : 
     169             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_create'
     170             : 
     171             :       INTEGER                                            :: handle, ncol_local, npcol, nprow, &
     172             :                                                             nrow_local
     173             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     174             : 
     175     1275188 :       CALL timeset(routineN, handle)
     176             : 
     177     1275188 :       context => matrix_struct%context
     178     1275188 :       matrix%matrix_struct => matrix_struct
     179     1275188 :       CALL cp_fm_struct_retain(matrix%matrix_struct)
     180             : 
     181     1275188 :       matrix%use_sp = .FALSE.
     182     1275188 :       IF (PRESENT(use_sp)) matrix%use_sp = use_sp
     183             : 
     184     1275188 :       nprow = context%num_pe(1)
     185     1275188 :       npcol = context%num_pe(2)
     186     1275188 :       NULLIFY (matrix%local_data)
     187     1275188 :       NULLIFY (matrix%local_data_sp)
     188             : 
     189             :       ! OK, we allocate here at least a 1 x 1 matrix
     190             :       ! this must (and is) compatible with the descinit call
     191             :       ! in cp_fm_struct
     192     1275188 :       nrow_local = matrix_struct%local_leading_dimension
     193     1275188 :       ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     194     1275188 :       IF (matrix%use_sp) THEN
     195           0 :          ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
     196             :       ELSE
     197     5100752 :          ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     198             :       END IF
     199             : 
     200             :       ! JVDV we should remove this, as it is up to the user to zero afterwards
     201     1275188 :       IF (matrix%use_sp) THEN
     202           0 :          matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
     203             :       ELSE
     204   615356432 :          matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
     205             :       END IF
     206             : 
     207     1275188 :       IF (PRESENT(name)) THEN
     208      630087 :          matrix%name = name
     209             :       ELSE
     210      645101 :          matrix%name = 'full matrix'
     211             :       END IF
     212             : 
     213     1275188 :       CALL timestop(handle)
     214             : 
     215     1275188 :    END SUBROUTINE cp_fm_create
     216             : 
     217             : ! **************************************************************************************************
     218             : !> \brief releases a full matrix
     219             : !> \param matrix the matrix to release
     220             : !> \par History
     221             : !>      08.2002 created [fawzi]
     222             : !> \author Fawzi Mohamed
     223             : ! **************************************************************************************************
     224     1287100 :    SUBROUTINE cp_fm_release_aa0(matrix)
     225             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: matrix
     226             : 
     227     1287100 :       IF (ASSOCIATED(matrix%local_data)) THEN
     228     1274488 :          DEALLOCATE (matrix%local_data)
     229             :          NULLIFY (matrix%local_data)
     230             :       END IF
     231     1287100 :       IF (ASSOCIATED(matrix%local_data_sp)) THEN
     232           0 :          DEALLOCATE (matrix%local_data_sp)
     233             :          NULLIFY (matrix%local_data_sp)
     234             :       END IF
     235     1287100 :       matrix%name = ""
     236     1287100 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     237             : 
     238     1287100 :    END SUBROUTINE cp_fm_release_aa0
     239             : 
     240             : ! **************************************************************************************************
     241             : !> \brief ...
     242             : !> \param matrices ...
     243             : ! **************************************************************************************************
     244       71770 :    SUBROUTINE cp_fm_release_aa1(matrices)
     245             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: matrices
     246             : 
     247             :       INTEGER                                            :: i
     248             : 
     249       71770 :       IF (ALLOCATED(matrices)) THEN
     250      194571 :          DO i = 1, SIZE(matrices)
     251      194571 :             CALL cp_fm_release(matrices(i))
     252             :          END DO
     253       70682 :          DEALLOCATE (matrices)
     254             :       END IF
     255       71770 :    END SUBROUTINE cp_fm_release_aa1
     256             : 
     257             : ! **************************************************************************************************
     258             : !> \brief ...
     259             : !> \param matrices ...
     260             : ! **************************************************************************************************
     261        7864 :    SUBROUTINE cp_fm_release_aa2(matrices)
     262             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: matrices
     263             : 
     264             :       INTEGER                                            :: i, j
     265             : 
     266        7864 :       IF (ALLOCATED(matrices)) THEN
     267       19016 :          DO i = 1, SIZE(matrices, 1)
     268       51362 :             DO j = 1, SIZE(matrices, 2)
     269       45774 :                CALL cp_fm_release(matrices(i, j))
     270             :             END DO
     271             :          END DO
     272        5588 :          DEALLOCATE (matrices)
     273             :       END IF
     274        7864 :    END SUBROUTINE cp_fm_release_aa2
     275             : 
     276             : ! **************************************************************************************************
     277             : !> \brief ...
     278             : !> \param matrices ...
     279             : ! **************************************************************************************************
     280          42 :    SUBROUTINE cp_fm_release_aa3(matrices)
     281             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: matrices
     282             : 
     283             :       INTEGER                                            :: i, j, k
     284             : 
     285          42 :       IF (ALLOCATED(matrices)) THEN
     286         454 :          DO i = 1, SIZE(matrices, 1)
     287        1306 :             DO j = 1, SIZE(matrices, 2)
     288        2276 :                DO k = 1, SIZE(matrices, 3)
     289        1864 :                   CALL cp_fm_release(matrices(i, j, k))
     290             :                END DO
     291             :             END DO
     292             :          END DO
     293          42 :          DEALLOCATE (matrices)
     294             :       END IF
     295          42 :    END SUBROUTINE cp_fm_release_aa3
     296             : 
     297             : ! **************************************************************************************************
     298             : !> \brief ...
     299             : !> \param matrices ...
     300             : ! **************************************************************************************************
     301      149932 :    SUBROUTINE cp_fm_release_pa1(matrices)
     302             :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: matrices
     303             : 
     304             :       INTEGER                                            :: i
     305             : 
     306      149932 :       IF (ASSOCIATED(matrices)) THEN
     307      119347 :          DO i = 1, SIZE(matrices)
     308      119347 :             CALL cp_fm_release(matrices(i))
     309             :          END DO
     310       51188 :          DEALLOCATE (matrices)
     311             :          NULLIFY (matrices)
     312             :       END IF
     313      149932 :    END SUBROUTINE cp_fm_release_pa1
     314             : 
     315             : ! **************************************************************************************************
     316             : !> \brief ...
     317             : !> \param matrices ...
     318             : ! **************************************************************************************************
     319       21264 :    SUBROUTINE cp_fm_release_pa2(matrices)
     320             :       TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: matrices
     321             : 
     322             :       INTEGER                                            :: i, j
     323             : 
     324       21264 :       IF (ASSOCIATED(matrices)) THEN
     325       44666 :          DO i = 1, SIZE(matrices, 1)
     326       87778 :             DO j = 1, SIZE(matrices, 2)
     327       76750 :                CALL cp_fm_release(matrices(i, j))
     328             :             END DO
     329             :          END DO
     330       11028 :          DEALLOCATE (matrices)
     331             :          NULLIFY (matrices)
     332             :       END IF
     333       21264 :    END SUBROUTINE cp_fm_release_pa2
     334             : 
     335             : ! **************************************************************************************************
     336             : !> \brief ...
     337             : !> \param matrices ...
     338             : ! **************************************************************************************************
     339           0 :    SUBROUTINE cp_fm_release_pa3(matrices)
     340             :       TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER      :: matrices
     341             : 
     342             :       INTEGER                                            :: i, j, k
     343             : 
     344           0 :       IF (ASSOCIATED(matrices)) THEN
     345           0 :          DO i = 1, SIZE(matrices, 1)
     346           0 :             DO j = 1, SIZE(matrices, 2)
     347           0 :                DO k = 1, SIZE(matrices, 3)
     348           0 :                   CALL cp_fm_release(matrices(i, j, k))
     349             :                END DO
     350             :             END DO
     351             :          END DO
     352           0 :          DEALLOCATE (matrices)
     353             :          NULLIFY (matrices)
     354             :       END IF
     355           0 :    END SUBROUTINE cp_fm_release_pa3
     356             : 
     357             : ! **************************************************************************************************
     358             : !> \brief ...
     359             : !> \param matrices ...
     360             : ! **************************************************************************************************
     361           0 :    SUBROUTINE cp_fm_release_ap1(matrices)
     362             :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:)      :: matrices
     363             : 
     364             :       INTEGER                                            :: i
     365             : 
     366           0 :       IF (ALLOCATED(matrices)) THEN
     367           0 :          DO i = 1, SIZE(matrices)
     368           0 :             CALL cp_fm_release(matrices(i)%matrix)
     369           0 :             DEALLOCATE (matrices(i)%matrix)
     370             :          END DO
     371           0 :          DEALLOCATE (matrices)
     372             :       END IF
     373           0 :    END SUBROUTINE cp_fm_release_ap1
     374             : 
     375             : ! **************************************************************************************************
     376             : !> \brief ...
     377             : !> \param matrices ...
     378             : ! **************************************************************************************************
     379           0 :    SUBROUTINE cp_fm_release_ap2(matrices)
     380             :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :)   :: matrices
     381             : 
     382             :       INTEGER                                            :: i, j
     383             : 
     384           0 :       IF (ALLOCATED(matrices)) THEN
     385           0 :          DO i = 1, SIZE(matrices, 1)
     386           0 :             DO j = 1, SIZE(matrices, 2)
     387           0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     388           0 :                DEALLOCATE (matrices(i, j)%matrix)
     389             :             END DO
     390             :          END DO
     391           0 :          DEALLOCATE (matrices)
     392             :       END IF
     393           0 :    END SUBROUTINE cp_fm_release_ap2
     394             : 
     395             : ! **************************************************************************************************
     396             : !> \brief ...
     397             : !> \param matrices ...
     398             : ! **************************************************************************************************
     399           0 :    SUBROUTINE cp_fm_release_pp1(matrices)
     400             :       TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: matrices
     401             : 
     402             :       INTEGER                                            :: i
     403             : 
     404           0 :       IF (ASSOCIATED(matrices)) THEN
     405           0 :          DO i = 1, SIZE(matrices)
     406           0 :             CALL cp_fm_release(matrices(i)%matrix)
     407           0 :             DEALLOCATE (matrices(i)%matrix)
     408             :          END DO
     409           0 :          DEALLOCATE (matrices)
     410             :          NULLIFY (matrices)
     411             :       END IF
     412           0 :    END SUBROUTINE cp_fm_release_pp1
     413             : 
     414             : ! **************************************************************************************************
     415             : !> \brief ...
     416             : !> \param matrices ...
     417             : ! **************************************************************************************************
     418           0 :    SUBROUTINE cp_fm_release_pp2(matrices)
     419             :       TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER       :: matrices
     420             : 
     421             :       INTEGER                                            :: i, j
     422             : 
     423           0 :       IF (ASSOCIATED(matrices)) THEN
     424           0 :          DO i = 1, SIZE(matrices, 1)
     425           0 :             DO j = 1, SIZE(matrices, 2)
     426           0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     427           0 :                DEALLOCATE (matrices(i, j)%matrix)
     428             :             END DO
     429             :          END DO
     430           0 :          DEALLOCATE (matrices)
     431             :          NULLIFY (matrices)
     432             :       END IF
     433           0 :    END SUBROUTINE cp_fm_release_pp2
     434             : 
     435             : ! **************************************************************************************************
     436             : !> \brief fills a matrix with random numbers
     437             : !> \param matrix : to be initialized
     438             : !> \param ncol : numbers of cols to fill
     439             : !> \param start_col : starting at coll number
     440             : !> \author Joost VandeVondele
     441             : !> \note
     442             : !>      the value of a_ij is independent of the number of cpus
     443             : ! **************************************************************************************************
     444        2622 :    SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
     445             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     446             :       INTEGER, INTENT(IN), OPTIONAL                      :: ncol, start_col
     447             : 
     448             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_init_random'
     449             : 
     450             :       INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
     451             :          ncol_local, nrow_global, nrow_local
     452        5244 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     453        2622 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: buff
     454             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     455        2622 :          POINTER                                         :: local_data
     456             :       REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
     457             :          seed = RESHAPE((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
     458             :       TYPE(rng_stream_type)                              :: rng
     459             : 
     460        2622 :       CALL timeset(routineN, handle)
     461             : 
     462             :       ! guarantee same seed over all tasks
     463        2622 :       CALL matrix%matrix_struct%para_env%bcast(seed, 0)
     464             : 
     465             :       rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
     466        2622 :                             extended_precision=.TRUE., seed=seed)
     467             : 
     468        2622 :       CPASSERT(.NOT. matrix%use_sp)
     469             : 
     470             :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
     471             :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     472             :                           local_data=local_data, &
     473        2622 :                           row_indices=row_indices, col_indices=col_indices)
     474             : 
     475        2622 :       my_start_col = 1
     476        2622 :       IF (PRESENT(start_col)) my_start_col = start_col
     477        2622 :       my_ncol = matrix%matrix_struct%ncol_global
     478        2622 :       IF (PRESENT(ncol)) my_ncol = ncol
     479             : 
     480        2622 :       IF (ncol_global < (my_start_col + my_ncol - 1)) &
     481           0 :          CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
     482             : 
     483        7866 :       ALLOCATE (buff(nrow_global))
     484             : 
     485             :       ! each global row has its own substream, in order to reach the stream for the local col,
     486             :       ! we just reset to the next substream
     487             :       ! following this, we fill the full buff with random numbers, and pick those we need
     488        2622 :       icol_global = 0
     489       21429 :       DO icol_local = 1, ncol_local
     490       18807 :          CPASSERT(col_indices(icol_local) > icol_global)
     491             :          DO
     492       18807 :             CALL rng%reset_to_next_substream()
     493       18807 :             icol_global = icol_global + 1
     494       18807 :             IF (icol_global == col_indices(icol_local)) EXIT
     495             :          END DO
     496       18807 :          CALL rng%fill(buff)
     497      785427 :          DO irow_local = 1, nrow_local
     498      782805 :             local_data(irow_local, icol_local) = buff(row_indices(irow_local))
     499             :          END DO
     500             :       END DO
     501             : 
     502        2622 :       DEALLOCATE (buff)
     503             : 
     504             :       ! store seed before deletion (unclear if this is the proper seed)
     505             : 
     506             :       ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
     507             :       ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
     508             :       ! stream of random numbers. While this seems odd and contrary to the original design,
     509             :       ! it was probably introduced to improve reproducibility.
     510             :       ! See also https://github.com/cp2k/cp2k/pull/506
     511        2622 :       CALL rng%get(ig=seed)
     512             : 
     513        2622 :       CALL timestop(handle)
     514             : 
     515       73416 :    END SUBROUTINE cp_fm_init_random
     516             : 
     517             : ! **************************************************************************************************
     518             : !> \brief set all elements of a matrix to the same value,
     519             : !>      and optionally the diagonal to a different one
     520             : !> \param matrix input matrix
     521             : !> \param alpha scalar used to set all elements of the matrix
     522             : !> \param beta scalar used to set diagonal of the matrix
     523             : !> \note
     524             : !>      can be used to zero a matrix
     525             : !>      can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
     526             : ! **************************************************************************************************
     527      334243 :    SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
     528             : 
     529             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     530             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     531             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     532             : 
     533             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_set_all'
     534             : 
     535             :       INTEGER                                            :: handle, i, n
     536             : 
     537      334243 :       CALL timeset(routineN, handle)
     538             : 
     539      334243 :       IF (matrix%use_sp) THEN
     540           0 :          matrix%local_data_sp(:, :) = REAL(alpha, sp)
     541             :       ELSE
     542   206631218 :          matrix%local_data(:, :) = alpha
     543             :       END IF
     544             : 
     545      334243 :       IF (PRESENT(beta)) THEN
     546       55192 :          CPASSERT(.NOT. matrix%use_sp)
     547       55192 :          n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     548      513760 :          DO i = 1, n
     549      513760 :             CALL cp_fm_set_element(matrix, i, i, beta)
     550             :          END DO
     551             :       END IF
     552             : 
     553      334243 :       CALL timestop(handle)
     554             : 
     555      334243 :    END SUBROUTINE cp_fm_set_all
     556             : 
     557             : ! **************************************************************************************************
     558             : !> \brief returns the diagonal elements of a fm
     559             : !> \param matrix ...
     560             : !> \param diag ...
     561             : ! **************************************************************************************************
     562       16520 :    SUBROUTINE cp_fm_get_diag(matrix, diag)
     563             : 
     564             :       ! arguments
     565             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
     566             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
     567             : 
     568             :       ! locals
     569             :       INTEGER :: i, nrow_global
     570             : 
     571             : #if defined(__parallel)
     572             :       INTEGER, DIMENSION(9) :: desca
     573             :       TYPE(cp_blacs_env_type), POINTER :: context
     574             :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     575             :                  nprow
     576       16520 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     577       16520 :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
     578             : #endif
     579             : 
     580       16520 :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
     581             : 
     582             : #if defined(__parallel)
     583      202226 :       diag = 0.0_dp
     584       16520 :       context => matrix%matrix_struct%context
     585       16520 :       myprow = context%mepos(1)
     586       16520 :       mypcol = context%mepos(2)
     587       16520 :       nprow = context%num_pe(1)
     588       16520 :       npcol = context%num_pe(2)
     589             : 
     590       16520 :       a => matrix%local_data
     591       16520 :       a_sp => matrix%local_data_sp
     592      165200 :       desca(:) = matrix%matrix_struct%descriptor(:)
     593             : 
     594      202226 :       DO i = 1, nrow_global
     595             :          CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
     596      185706 :                       irow_local, icol_local, iprow, ipcol)
     597      202226 :          IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     598       92937 :             IF (matrix%use_sp) THEN
     599           0 :                diag(i) = REAL(a_sp(irow_local, icol_local), dp)
     600             :             ELSE
     601       92937 :                diag(i) = a(irow_local, icol_local)
     602             :             END IF
     603             :          END IF
     604             :       END DO
     605             : #else
     606             :       DO i = 1, nrow_global
     607             :          IF (matrix%use_sp) THEN
     608             :             diag(i) = REAL(matrix%local_data_sp(i, i), dp)
     609             :          ELSE
     610             :             diag(i) = matrix%local_data(i, i)
     611             :          END IF
     612             :       END DO
     613             : #endif
     614      387932 :       CALL matrix%matrix_struct%para_env%sum(diag)
     615             : 
     616       16520 :    END SUBROUTINE cp_fm_get_diag
     617             : 
     618             : ! **************************************************************************************************
     619             : !> \brief returns an element of a fm
     620             : !>      this value is valid on every cpu
     621             : !>      using this call is expensive
     622             : !> \param matrix the matrix to read
     623             : !> \param irow_global the row
     624             : !> \param icol_global the col
     625             : !> \param alpha the value of matrix(irow_global, icol_global)
     626             : !> \param local true if the element is on this cpu, false otherwise
     627             : !> \note
     628             : !>      - modified semantics. now this function always returns the value
     629             : !>        previously the value was zero on cpus that didn't own the relevant
     630             : !>        part of the matrix (Joost VandeVondele, May 2003)
     631             : !>      - usage of the function should be avoided, as it is likely to rather slow
     632             : !>        using row_indices/col_indices/local_data + some smart scheme normally
     633             : !>        yields a real parallel code
     634             : ! **************************************************************************************************
     635     2465852 :    SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
     636             : 
     637             :       ! arguments
     638             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     639             :       REAL(KIND=dp), INTENT(OUT)                     :: alpha
     640             :       INTEGER, INTENT(IN)                       :: icol_global, &
     641             :                                                    irow_global
     642             :       LOGICAL, INTENT(OUT), OPTIONAL            :: local
     643             : 
     644             :       ! locals
     645             : #if defined(__parallel)
     646             :       INTEGER, DIMENSION(9) :: desca
     647             :       TYPE(cp_blacs_env_type), POINTER :: context
     648             :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     649             :                  nprow
     650     2465852 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     651             : #endif
     652             : 
     653             : #if defined(__parallel)
     654     2465852 :       context => matrix%matrix_struct%context
     655     2465852 :       myprow = context%mepos(1)
     656     2465852 :       mypcol = context%mepos(2)
     657     2465852 :       nprow = context%num_pe(1)
     658     2465852 :       npcol = context%num_pe(2)
     659             : 
     660     2465852 :       a => matrix%local_data
     661    24658520 :       desca(:) = matrix%matrix_struct%descriptor(:)
     662             : 
     663             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     664     2465852 :                    irow_local, icol_local, iprow, ipcol)
     665             : 
     666     2465852 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     667     1232926 :          alpha = a(irow_local, icol_local)
     668     1232926 :          CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
     669     1232926 :          IF (PRESENT(local)) local = .TRUE.
     670             :       ELSE
     671     1232926 :          CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     672     1232926 :          IF (PRESENT(local)) local = .FALSE.
     673             :       END IF
     674             : 
     675             : #else
     676             :       IF (PRESENT(local)) local = .TRUE.
     677             :       alpha = matrix%local_data(irow_global, icol_global)
     678             : #endif
     679             : 
     680     2465852 :    END SUBROUTINE cp_fm_get_element
     681             : 
     682             : ! **************************************************************************************************
     683             : !> \brief sets an element of a matrix
     684             : !> \param matrix ...
     685             : !> \param irow_global ...
     686             : !> \param icol_global ...
     687             : !> \param alpha ...
     688             : !> \note
     689             : !>      we expect all cpus to have the same arguments in the call to this function
     690             : !>      (otherwise one should use local_data tricks)
     691             : ! **************************************************************************************************
     692      862216 :    SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
     693             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     694             :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
     695             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     696             : 
     697             :       INTEGER                                  :: mypcol, myprow, npcol, nprow
     698             :       TYPE(cp_blacs_env_type), POINTER         :: context
     699             : #if defined(__parallel)
     700             :       INTEGER                                  :: icol_local, ipcol, iprow, &
     701             :                                                   irow_local
     702             :       INTEGER, DIMENSION(9)                    :: desca
     703      862216 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     704             : #endif
     705             : 
     706      862216 :       context => matrix%matrix_struct%context
     707      862216 :       myprow = context%mepos(1)
     708      862216 :       mypcol = context%mepos(2)
     709      862216 :       nprow = context%num_pe(1)
     710      862216 :       npcol = context%num_pe(2)
     711             : 
     712           0 :       CPASSERT(.NOT. matrix%use_sp)
     713             : 
     714             : #if defined(__parallel)
     715             : 
     716      862216 :       a => matrix%local_data
     717             : 
     718     8622160 :       desca(:) = matrix%matrix_struct%descriptor(:)
     719             : 
     720             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     721      862216 :                    irow_local, icol_local, iprow, ipcol)
     722             : 
     723      862216 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     724      435263 :          a(irow_local, icol_local) = alpha
     725             :       END IF
     726             : 
     727             : #else
     728             : 
     729             :       matrix%local_data(irow_global, icol_global) = alpha
     730             : 
     731             : #endif
     732      862216 :    END SUBROUTINE cp_fm_set_element
     733             : 
     734             : ! **************************************************************************************************
     735             : !> \brief sets a submatrix of a full matrix
     736             : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     737             : !>       = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
     738             : !>       * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     739             : !> \param fm the full to change
     740             : !> \param new_values a replicated full matrix with the new values
     741             : !> \param start_row the starting row of b_matrix (defaults to 1)
     742             : !> \param start_col the starting col of b_matrix (defaults to 1)
     743             : !> \param n_rows the number of row to change in b (defaults to
     744             : !>        size(op(new_values),1))
     745             : !> \param n_cols the number of columns to change in b (defaults to
     746             : !>        size(op(new_values),2))
     747             : !> \param alpha rescaling factor for the new values (defaults to 1.0)
     748             : !> \param beta rescaling factor for the old values (defaults to 0.0)
     749             : !> \param transpose if new_values should be transposed: if true
     750             : !>        op(new_values)=new_values^T, else op(new_values)=new_values
     751             : !>        (defaults to false)
     752             : !> \par History
     753             : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     754             : !> \author Fawzi Mohamed
     755             : !> \note
     756             : !>      optimized for full column updates and alpha=1.0, beta=0.0
     757             : !>      the new_values need to be valid on all cpus
     758             : ! **************************************************************************************************
     759       62609 :    SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
     760             :                                   start_col, n_rows, n_cols, alpha, beta, transpose)
     761             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     762             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(in)         :: new_values
     763             :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     764             :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha, beta
     765             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     766             : 
     767             :       INTEGER                                            :: i, i0, j, j0, ncol, ncol_block, &
     768             :                                                             ncol_global, ncol_local, nrow, &
     769             :                                                             nrow_block, nrow_global, nrow_local, &
     770             :                                                             this_col, this_row
     771       62609 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     772             :       LOGICAL                                            :: tr_a
     773             :       REAL(KIND=dp)                                      :: al, be
     774       62609 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     775             : 
     776       62609 :       al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
     777             :       ! can be called too many times, making it a bit useless
     778             :       ! CALL timeset(routineN//','//moduleN,handle)
     779             : 
     780           0 :       CPASSERT(.NOT. fm%use_sp)
     781             : 
     782       62609 :       IF (PRESENT(alpha)) al = alpha
     783       62609 :       IF (PRESENT(beta)) be = beta
     784       62609 :       IF (PRESENT(start_row)) i0 = start_row
     785       62609 :       IF (PRESENT(start_col)) j0 = start_col
     786       62609 :       IF (PRESENT(transpose)) tr_a = transpose
     787       15683 :       IF (tr_a) THEN
     788       15591 :          nrow = SIZE(new_values, 2)
     789       15591 :          ncol = SIZE(new_values, 1)
     790             :       ELSE
     791       47018 :          nrow = SIZE(new_values, 1)
     792       47018 :          ncol = SIZE(new_values, 2)
     793             :       END IF
     794       62609 :       IF (PRESENT(n_rows)) nrow = n_rows
     795       62609 :       IF (PRESENT(n_cols)) ncol = n_cols
     796             : 
     797       62609 :       full_block => fm%local_data
     798             : 
     799             :       CALL cp_fm_get_info(matrix=fm, &
     800             :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     801             :                           nrow_block=nrow_block, ncol_block=ncol_block, &
     802             :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     803       62609 :                           row_indices=row_indices, col_indices=col_indices)
     804             : 
     805       62609 :       IF (al == 1.0 .AND. be == 0.0) THEN
     806     1302958 :          DO j = 1, ncol_local
     807     1249487 :             this_col = col_indices(j) - j0 + 1
     808     1302958 :             IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     809      285169 :                IF (tr_a) THEN
     810        6453 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     811      157620 :                      DO i = 1, nrow_local
     812      157620 :                         full_block(i, j) = new_values(this_col, row_indices(i))
     813             :                      END DO
     814             :                   ELSE
     815         594 :                      DO i = 1, nrow_local
     816         510 :                         this_row = row_indices(i) - i0 + 1
     817         594 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     818         255 :                            full_block(i, j) = new_values(this_col, this_row)
     819             :                         END IF
     820             :                      END DO
     821             :                   END IF
     822             :                ELSE
     823      278716 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     824     8919342 :                      DO i = 1, nrow_local
     825     8919342 :                         full_block(i, j) = new_values(row_indices(i), this_col)
     826             :                      END DO
     827             :                   ELSE
     828        4347 :                      DO i = 1, nrow_local
     829        3745 :                         this_row = row_indices(i) - i0 + 1
     830        4347 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     831         301 :                            full_block(i, j) = new_values(this_row, this_col)
     832             :                         END IF
     833             :                      END DO
     834             :                   END IF
     835             :                END IF
     836             :             END IF
     837             :          END DO
     838             :       ELSE
     839      831608 :          DO j = 1, ncol_local
     840      822470 :             this_col = col_indices(j) - j0 + 1
     841      831608 :             IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     842      822470 :                IF (tr_a) THEN
     843    88223375 :                   DO i = 1, nrow_local
     844    87400905 :                      this_row = row_indices(i) - i0 + 1
     845    88223375 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     846             :                         full_block(i, j) = al*new_values(this_col, this_row) + &
     847      411235 :                                            be*full_block(i, j)
     848             :                      END IF
     849             :                   END DO
     850             :                ELSE
     851           0 :                   DO i = 1, nrow_local
     852           0 :                      this_row = row_indices(i) - i0 + 1
     853           0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     854             :                         full_block(i, j) = al*new_values(this_row, this_col) + &
     855           0 :                                            be*full_block(i, j)
     856             :                      END IF
     857             :                   END DO
     858             :                END IF
     859             :             END IF
     860             :          END DO
     861             :       END IF
     862             : 
     863             :       ! CALL timestop(handle)
     864             : 
     865       62609 :    END SUBROUTINE cp_fm_set_submatrix
     866             : 
     867             : ! **************************************************************************************************
     868             : !> \brief gets a submatrix of a full matrix
     869             : !>       op(target_m)(1:n_rows,1:n_cols)
     870             : !>       =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     871             : !>      target_m is replicated on all cpus
     872             : !>      using this call is expensive
     873             : !> \param fm the full you want to get the info from
     874             : !> \param target_m a replicated full matrix that will contain the result
     875             : !> \param start_row the starting row of b_matrix (defaults to 1)
     876             : !> \param start_col the starting col of b_matrix (defaults to 1)
     877             : !> \param n_rows the number of row to change in b (defaults to
     878             : !>        size(op(new_values),1))
     879             : !> \param n_cols the number of columns to change in b (defaults to
     880             : !>        size(op(new_values),2))
     881             : !> \param transpose if target_m should be transposed: if true
     882             : !>        op(target_m)=target_m^T, else op(target_m)=target_m
     883             : !>        (defaults to false)
     884             : !> \par History
     885             : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     886             : !> \author Fawzi Mohamed
     887             : !> \note
     888             : !>      optimized for full column updates. Zeros out a little too much
     889             : !>      of target_m
     890             : !>      the target_m is replicated and valid on all cpus
     891             : ! **************************************************************************************************
     892       73020 :    SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
     893             :                                   start_col, n_rows, n_cols, transpose)
     894             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     895             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(out)        :: target_m
     896             :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     897             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     898             : 
     899             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
     900             : 
     901             :       INTEGER                                            :: handle, i, i0, j, j0, ncol, ncol_global, &
     902             :                                                             ncol_local, nrow, nrow_global, &
     903             :                                                             nrow_local, this_col, this_row
     904       73020 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     905             :       LOGICAL                                            :: tr_a
     906       73020 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     907             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     908             : 
     909       73020 :       CALL timeset(routineN, handle)
     910             : 
     911       73020 :       i0 = 1; j0 = 1; tr_a = .FALSE.
     912             : 
     913       73020 :       CPASSERT(.NOT. fm%use_sp)
     914             : 
     915       73020 :       IF (PRESENT(start_row)) i0 = start_row
     916       73020 :       IF (PRESENT(start_col)) j0 = start_col
     917       73020 :       IF (PRESENT(transpose)) tr_a = transpose
     918        6240 :       IF (tr_a) THEN
     919        2362 :          nrow = SIZE(target_m, 2)
     920        2362 :          ncol = SIZE(target_m, 1)
     921             :       ELSE
     922       70658 :          nrow = SIZE(target_m, 1)
     923       70658 :          ncol = SIZE(target_m, 2)
     924             :       END IF
     925       73020 :       IF (PRESENT(n_rows)) nrow = n_rows
     926       73020 :       IF (PRESENT(n_cols)) ncol = n_cols
     927             : 
     928       73020 :       para_env => fm%matrix_struct%para_env
     929             : 
     930       73020 :       full_block => fm%local_data
     931             : #if defined(__parallel)
     932             :       ! zero-out whole target_m
     933       73020 :       IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
     934       97288 :          CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
     935             :       END IF
     936             : #endif
     937             : 
     938             :       CALL cp_fm_get_info(matrix=fm, &
     939             :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     940             :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     941       73020 :                           row_indices=row_indices, col_indices=col_indices)
     942             : 
     943      477354 :       DO j = 1, ncol_local
     944      404334 :          this_col = col_indices(j) - j0 + 1
     945      477354 :          IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     946      274010 :             IF (tr_a) THEN
     947        2362 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     948       81863 :                   DO i = 1, nrow_local
     949       81863 :                      target_m(this_col, row_indices(i)) = full_block(i, j)
     950             :                   END DO
     951             :                ELSE
     952           0 :                   DO i = 1, nrow_local
     953           0 :                      this_row = row_indices(i) - i0 + 1
     954           0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     955           0 :                         target_m(this_col, this_row) = full_block(i, j)
     956             :                      END IF
     957             :                   END DO
     958             :                END IF
     959             :             ELSE
     960      271648 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     961     6029368 :                   DO i = 1, nrow_local
     962     6029368 :                      target_m(row_indices(i), this_col) = full_block(i, j)
     963             :                   END DO
     964             :                ELSE
     965     1168114 :                   DO i = 1, nrow_local
     966     1141698 :                      this_row = row_indices(i) - i0 + 1
     967     1168114 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     968       24462 :                         target_m(this_row, this_col) = full_block(i, j)
     969             :                      END IF
     970             :                   END DO
     971             :                END IF
     972             :             END IF
     973             :          END IF
     974             :       END DO
     975             : 
     976    15985988 :       CALL para_env%sum(target_m)
     977             : 
     978       73020 :       CALL timestop(handle)
     979             : 
     980       73020 :    END SUBROUTINE cp_fm_get_submatrix
     981             : 
     982             : ! **************************************************************************************************
     983             : !> \brief returns all kind of information about the full matrix
     984             : !> \param matrix ...
     985             : !> \param name ...
     986             : !> \param nrow_global ...
     987             : !> \param ncol_global ...
     988             : !> \param nrow_block ...
     989             : !> \param ncol_block ...
     990             : !> \param nrow_local ...
     991             : !> \param ncol_local ...
     992             : !> \param row_indices ...
     993             : !> \param col_indices ...
     994             : !> \param local_data ...
     995             : !> \param context ...
     996             : !> \param nrow_locals ...
     997             : !> \param ncol_locals ...
     998             : !> \param matrix_struct ...
     999             : !> \param para_env ...
    1000             : !> \note
    1001             : !>       see also cp_fm_struct for explanation
    1002             : !>       - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
    1003             : !>         access to the local blacs block
    1004             : ! **************************************************************************************************
    1005     5410849 :    SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
    1006             :                              nrow_block, ncol_block, nrow_local, ncol_local, &
    1007             :                              row_indices, col_indices, local_data, context, &
    1008             :                              nrow_locals, ncol_locals, matrix_struct, para_env)
    1009             : 
    1010             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1011             :       CHARACTER(LEN=*), INTENT(OUT), OPTIONAL            :: name
    1012             :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
    1013             :                                                             ncol_block, nrow_local, ncol_local
    1014             :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
    1015             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1016             :          OPTIONAL, POINTER                               :: local_data
    1017             :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
    1018             :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: nrow_locals, ncol_locals
    1019             :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
    1020             :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
    1021             : 
    1022           8 :       IF (PRESENT(name)) name = matrix%name
    1023     5410849 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
    1024     5410849 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
    1025             : 
    1026             :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
    1027             :                             ncol_local=ncol_local, nrow_global=nrow_global, &
    1028             :                             ncol_global=ncol_global, nrow_block=nrow_block, &
    1029             :                             ncol_block=ncol_block, row_indices=row_indices, &
    1030             :                             col_indices=col_indices, nrow_locals=nrow_locals, &
    1031     5410849 :                             ncol_locals=ncol_locals, context=context, para_env=para_env)
    1032             : 
    1033     5410849 :    END SUBROUTINE cp_fm_get_info
    1034             : 
    1035             : ! **************************************************************************************************
    1036             : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
    1037             : !> \param matrix a cp_fm_type instance
    1038             : !> \param io_unit the I/O unit to use for writing
    1039             : ! **************************************************************************************************
    1040           3 :    SUBROUTINE cp_fm_write_info(matrix, io_unit)
    1041             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1042             :       INTEGER, INTENT(IN)                                :: io_unit
    1043             : 
    1044           3 :       WRITE (io_unit, '(/,A,A12)') "CP_FM | Name:                           ", matrix%name
    1045           3 :       CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
    1046           3 :    END SUBROUTINE cp_fm_write_info
    1047             : 
    1048             : ! **************************************************************************************************
    1049             : !> \brief find the maximum absolute value of the matrix element
    1050             : !>      maxval(abs(matrix))
    1051             : !> \param matrix ...
    1052             : !> \param a_max ...
    1053             : !> \param ir_max ...
    1054             : !> \param ic_max ...
    1055             : ! **************************************************************************************************
    1056       91623 :    SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
    1057             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1058             :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1059             :       INTEGER, INTENT(OUT), OPTIONAL                     :: ir_max, ic_max
    1060             : 
    1061             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_maxabsval'
    1062             : 
    1063             :       INTEGER                                            :: handle, i, ic_max_local, ir_max_local, &
    1064             :                                                             j, mepos, ncol_local, nrow_local, &
    1065             :                                                             num_pe
    1066       91623 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ic_max_vec, ir_max_vec
    1067       91623 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1068             :       REAL(dp)                                           :: my_max
    1069       91623 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_max_vec
    1070       91623 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1071       91623 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: my_block_sp
    1072             : 
    1073       91623 :       CALL timeset(routineN, handle)
    1074             : 
    1075       91623 :       my_block => matrix%local_data
    1076       91623 :       my_block_sp => matrix%local_data_sp
    1077             : 
    1078             :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
    1079       91623 :                           row_indices=row_indices, col_indices=col_indices)
    1080             : 
    1081       91623 :       IF (matrix%use_sp) THEN
    1082           0 :          a_max = REAL(MAXVAL(ABS(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
    1083             :       ELSE
    1084    54588539 :          a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
    1085             :       END IF
    1086             : 
    1087       91623 :       IF (PRESENT(ir_max)) THEN
    1088           0 :          num_pe = matrix%matrix_struct%para_env%num_pe
    1089           0 :          mepos = matrix%matrix_struct%para_env%mepos
    1090           0 :          ALLOCATE (ir_max_vec(0:num_pe - 1))
    1091           0 :          ir_max_vec(0:num_pe - 1) = 0
    1092           0 :          ALLOCATE (ic_max_vec(0:num_pe - 1))
    1093           0 :          ic_max_vec(0:num_pe - 1) = 0
    1094           0 :          ALLOCATE (a_max_vec(0:num_pe - 1))
    1095           0 :          a_max_vec(0:num_pe - 1) = 0.0_dp
    1096           0 :          my_max = 0.0_dp
    1097             : 
    1098           0 :          IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
    1099           0 :             DO i = 1, ncol_local
    1100           0 :                DO j = 1, nrow_local
    1101           0 :                   IF (matrix%use_sp) THEN
    1102           0 :                      IF (ABS(my_block_sp(j, i)) > my_max) THEN
    1103           0 :                         my_max = REAL(my_block_sp(j, i), dp)
    1104           0 :                         ir_max_local = j
    1105           0 :                         ic_max_local = i
    1106             :                      END IF
    1107             :                   ELSE
    1108           0 :                      IF (ABS(my_block(j, i)) > my_max) THEN
    1109           0 :                         my_max = my_block(j, i)
    1110           0 :                         ir_max_local = j
    1111           0 :                         ic_max_local = i
    1112             :                      END IF
    1113             :                   END IF
    1114             :                END DO
    1115             :             END DO
    1116             : 
    1117           0 :             a_max_vec(mepos) = my_max
    1118           0 :             ir_max_vec(mepos) = row_indices(ir_max_local)
    1119           0 :             ic_max_vec(mepos) = col_indices(ic_max_local)
    1120             : 
    1121             :          END IF
    1122             : 
    1123           0 :          CALL matrix%matrix_struct%para_env%sum(a_max_vec)
    1124           0 :          CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
    1125           0 :          CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
    1126             : 
    1127           0 :          my_max = 0.0_dp
    1128           0 :          DO i = 0, num_pe - 1
    1129           0 :             IF (a_max_vec(i) > my_max) THEN
    1130           0 :                ir_max = ir_max_vec(i)
    1131           0 :                ic_max = ic_max_vec(i)
    1132             :             END IF
    1133             :          END DO
    1134             : 
    1135           0 :          DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
    1136           0 :          CPASSERT(ic_max > 0)
    1137           0 :          CPASSERT(ir_max > 0)
    1138             : 
    1139             :       END IF
    1140             : 
    1141       91623 :       CALL matrix%matrix_struct%para_env%max(a_max)
    1142             : 
    1143       91623 :       CALL timestop(handle)
    1144             : 
    1145      183246 :    END SUBROUTINE cp_fm_maxabsval
    1146             : 
    1147             : ! **************************************************************************************************
    1148             : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
    1149             : !>      = || A ||_infinity
    1150             : !> \param matrix ...
    1151             : !> \param a_max ...
    1152             : !> \note
    1153             : !>      for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
    1154             : !>      Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
    1155             : !>      http://mathworld.wolfram.com/MatrixNorm.html
    1156             : !>      (but the bound is not so tight in the general case)
    1157             : ! **************************************************************************************************
    1158        4244 :    SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
    1159             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1160             :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1161             : 
    1162             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
    1163             : 
    1164             :       INTEGER                                            :: handle, i, j, ncol_local, nrow_global, &
    1165             :                                                             nrow_local
    1166        4244 :       INTEGER, DIMENSION(:), POINTER                     :: row_indices
    1167             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: values
    1168        4244 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1169             : 
    1170        4244 :       CALL timeset(routineN, handle)
    1171             : 
    1172        4244 :       my_block => matrix%local_data
    1173             : 
    1174        4244 :       CPASSERT(.NOT. matrix%use_sp)
    1175             : 
    1176             :       CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
    1177        4244 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1178             : 
    1179             :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1180       12732 :       ALLOCATE (values(nrow_global))
    1181       62104 :       values = 0.0_dp
    1182       62104 :       DO j = 1, ncol_local
    1183      525404 :          DO i = 1, nrow_local
    1184      521160 :             values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
    1185             :          END DO
    1186             :       END DO
    1187        4244 :       CALL matrix%matrix_struct%para_env%sum(values)
    1188       66348 :       a_max = MAXVAL(values)
    1189        4244 :       DEALLOCATE (values)
    1190             : 
    1191        4244 :       CALL timestop(handle)
    1192        8488 :    END SUBROUTINE
    1193             : 
    1194             : ! **************************************************************************************************
    1195             : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
    1196             : !> \param matrix ...
    1197             : !> \param norm_array ...
    1198             : ! **************************************************************************************************
    1199        1282 :    SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
    1200             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1201             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: norm_array
    1202             : 
    1203             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorsnorm'
    1204             : 
    1205             :       INTEGER                                            :: handle, i, j, ncol_global, ncol_local, &
    1206             :                                                             nrow_local
    1207        1282 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices
    1208        1282 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1209             : 
    1210        1282 :       CALL timeset(routineN, handle)
    1211             : 
    1212        1282 :       my_block => matrix%local_data
    1213             : 
    1214        1282 :       CPASSERT(.NOT. matrix%use_sp)
    1215             : 
    1216             :       CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
    1217        1282 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1218             : 
    1219             :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1220       29746 :       norm_array = 0.0_dp
    1221       29746 :       DO j = 1, ncol_local
    1222     1261016 :          DO i = 1, nrow_local
    1223     1259734 :             norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
    1224             :          END DO
    1225             :       END DO
    1226       58210 :       CALL matrix%matrix_struct%para_env%sum(norm_array)
    1227       29746 :       norm_array = SQRT(norm_array)
    1228             : 
    1229        1282 :       CALL timestop(handle)
    1230        1282 :    END SUBROUTINE
    1231             : 
    1232             : ! **************************************************************************************************
    1233             : !> \brief summing up all the elements along the matrix's i-th index
    1234             : !>        \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
    1235             : !>        or
    1236             : !>        \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
    1237             : !> \param matrix     an input matrix A
    1238             : !> \param sum_array  sums of elements in each column/row
    1239             : !> \param dir ...
    1240             : !> \note  forked from cp_fm_vectorsnorm() to be used with
    1241             : !>        the maximum overlap method
    1242             : !>        added row variation
    1243             : ! **************************************************************************************************
    1244        7194 :    SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
    1245             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1246             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: sum_array
    1247             :       CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: dir
    1248             : 
    1249             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorssum'
    1250             : 
    1251             :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
    1252        7194 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1253             :       LOGICAL                                            :: docol
    1254        7194 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1255             : 
    1256        7194 :       CALL timeset(routineN, handle)
    1257             : 
    1258        7194 :       IF (PRESENT(dir)) THEN
    1259        7154 :          IF (dir == 'c' .OR. dir == 'C') THEN
    1260             :             docol = .TRUE.
    1261             :          ELSEIF (dir == 'r' .OR. dir == 'R') THEN
    1262             :             docol = .FALSE.
    1263             :          ELSE
    1264           0 :             CPABORT('Wrong argument DIR')
    1265             :          END IF
    1266             :       ELSE
    1267             :          docol = .TRUE.
    1268             :       END IF
    1269             : 
    1270        7194 :       my_block => matrix%local_data
    1271             : 
    1272        7194 :       CPASSERT(.NOT. matrix%use_sp)
    1273             : 
    1274             :       CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
    1275        7194 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1276             : 
    1277             :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1278      221150 :       sum_array(:) = 0.0_dp
    1279        7194 :       IF (docol) THEN
    1280         448 :       DO j = 1, ncol_local
    1281        3628 :          DO i = 1, nrow_local
    1282        3588 :             sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
    1283             :          END DO
    1284             :       END DO
    1285             :       ELSE
    1286       82094 :       DO j = 1, ncol_local
    1287     6429458 :          DO i = 1, nrow_local
    1288     6422304 :             sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
    1289             :          END DO
    1290             :       END DO
    1291             :       END IF
    1292      435106 :       CALL matrix%matrix_struct%para_env%sum(sum_array)
    1293             : 
    1294        7194 :       CALL timestop(handle)
    1295        7194 :    END SUBROUTINE
    1296             : 
    1297             : ! **************************************************************************************************
    1298             : !> \brief copy one identically sized matrix in the other
    1299             : !> \param source ...
    1300             : !> \param destination ...
    1301             : !> \note
    1302             : !>      see also cp_fm_to_fm_columns
    1303             : ! **************************************************************************************************
    1304      358894 :    SUBROUTINE cp_fm_to_fm_matrix(source, destination)
    1305             : 
    1306             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1307             : 
    1308             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
    1309             : 
    1310             :       INTEGER                                            :: handle, npcol, nprow
    1311             : 
    1312      358894 :       CALL timeset(routineN, handle)
    1313             : 
    1314      358894 :       nprow = source%matrix_struct%context%num_pe(1)
    1315      358894 :       npcol = source%matrix_struct%context%num_pe(2)
    1316             : 
    1317      358894 :       IF ((.NOT. cp2k_is_parallel) .OR. &
    1318             :           cp_fm_struct_equivalent(source%matrix_struct, &
    1319             :                                   destination%matrix_struct)) THEN
    1320      358894 :          IF (source%use_sp .AND. destination%use_sp) THEN
    1321           0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1322             :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
    1323             :                CALL cp_abort(__LOCATION__, &
    1324             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1325             :                              "> to full matrix <"//TRIM(destination%name)// &
    1326           0 :                              ">. The local_data blocks have different sizes.")
    1327             :             CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
    1328           0 :                        source%local_data_sp, 1, destination%local_data_sp, 1)
    1329      358894 :          ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
    1330           0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
    1331             :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
    1332             :                CALL cp_abort(__LOCATION__, &
    1333             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1334             :                              "> to full matrix <"//TRIM(destination%name)// &
    1335           0 :                              ">. The local_data blocks have different sizes.")
    1336           0 :             destination%local_data = REAL(source%local_data_sp, dp)
    1337      358894 :          ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
    1338           0 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1339             :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
    1340             :                CALL cp_abort(__LOCATION__, &
    1341             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1342             :                              "> to full matrix <"//TRIM(destination%name)// &
    1343           0 :                              ">. The local_data blocks have different sizes.")
    1344           0 :             destination%local_data_sp = REAL(source%local_data, sp)
    1345             :          ELSE
    1346      358894 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
    1347             :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
    1348             :                CALL cp_abort(__LOCATION__, &
    1349             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1350             :                              "> to full matrix <"//TRIM(destination%name)// &
    1351           0 :                              ">. The local_data blocks have different sizes.")
    1352             :             CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
    1353      358894 :                        source%local_data, 1, destination%local_data, 1)
    1354             :          END IF
    1355             :       ELSE
    1356           0 :          CPABORT("Data structures of source and target full matrix are not equivalent")
    1357             :       END IF
    1358             : 
    1359      358894 :       CALL timestop(handle)
    1360             : 
    1361      358894 :    END SUBROUTINE cp_fm_to_fm_matrix
    1362             : 
    1363             : ! **************************************************************************************************
    1364             : !> \brief copy just a subset of columns of a fm to a fm
    1365             : !> \param msource ...
    1366             : !> \param mtarget ...
    1367             : !> \param ncol ...
    1368             : !> \param source_start ...
    1369             : !> \param target_start ...
    1370             : ! **************************************************************************************************
    1371      163594 :    SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
    1372             :                                   target_start)
    1373             : 
    1374             :       TYPE(cp_fm_type), INTENT(IN)          :: msource, mtarget
    1375             :       INTEGER, INTENT(IN)                      :: ncol
    1376             :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
    1377             : 
    1378             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
    1379             : 
    1380             :       INTEGER                                  :: handle, n, ss, ts
    1381             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1382             : #if defined(__parallel)
    1383             :       INTEGER                                  :: i
    1384             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1385             : #endif
    1386             : 
    1387      163594 :       CALL timeset(routineN, handle)
    1388             : 
    1389      163594 :       ss = 1
    1390      163594 :       ts = 1
    1391             : 
    1392      163594 :       IF (PRESENT(source_start)) ss = source_start
    1393      163594 :       IF (PRESENT(target_start)) ts = target_start
    1394             : 
    1395      163594 :       n = msource%matrix_struct%nrow_global
    1396             : 
    1397      163594 :       a => msource%local_data
    1398      163594 :       b => mtarget%local_data
    1399             : 
    1400             : #if defined(__parallel)
    1401     1635940 :       desca(:) = msource%matrix_struct%descriptor(:)
    1402     1635940 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1403      655308 :       DO i = 0, ncol - 1
    1404      655308 :          CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
    1405             :       END DO
    1406             : #else
    1407             :       IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
    1408             :          CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
    1409             :       END IF
    1410             : #endif
    1411             : 
    1412      163594 :       CALL timestop(handle)
    1413             : 
    1414      163594 :    END SUBROUTINE cp_fm_to_fm_columns
    1415             : 
    1416             : ! **************************************************************************************************
    1417             : !> \brief copy just a triangular matrix
    1418             : !> \param msource ...
    1419             : !> \param mtarget ...
    1420             : !> \param uplo ...
    1421             : ! **************************************************************************************************
    1422         370 :    SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
    1423             : 
    1424             :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1425             :       CHARACTER(LEN=1), OPTIONAL, INTENT(IN)   :: uplo
    1426             : 
    1427             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
    1428             : 
    1429             :       CHARACTER(LEN=1)                         :: myuplo
    1430             :       INTEGER                                  :: handle, ncol, nrow
    1431         370 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1432             : #if defined(__parallel)
    1433             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1434             : #endif
    1435             : 
    1436         370 :       CALL timeset(routineN, handle)
    1437             : 
    1438         370 :       myuplo = 'U'
    1439         370 :       IF (PRESENT(uplo)) myuplo = uplo
    1440             : 
    1441         370 :       nrow = msource%matrix_struct%nrow_global
    1442         370 :       ncol = msource%matrix_struct%ncol_global
    1443             : 
    1444         370 :       a => msource%local_data
    1445         370 :       b => mtarget%local_data
    1446             : 
    1447             : #if defined(__parallel)
    1448        3700 :       desca(:) = msource%matrix_struct%descriptor(:)
    1449        3700 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1450         370 :       CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
    1451             : #else
    1452             :       CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
    1453             : #endif
    1454             : 
    1455         370 :       CALL timestop(handle)
    1456             : 
    1457         370 :    END SUBROUTINE cp_fm_to_fm_triangular
    1458             : 
    1459             : ! **************************************************************************************************
    1460             : !> \brief copy just a part ot the matrix
    1461             : !> \param msource ...
    1462             : !> \param mtarget ...
    1463             : !> \param nrow ...
    1464             : !> \param ncol ...
    1465             : !> \param s_firstrow ...
    1466             : !> \param s_firstcol ...
    1467             : !> \param t_firstrow ...
    1468             : !> \param t_firstcol ...
    1469             : ! **************************************************************************************************
    1470             : 
    1471       11152 :    SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
    1472             : 
    1473             :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1474             :       INTEGER, INTENT(IN)                      :: nrow, ncol, s_firstrow, &
    1475             :                                                   s_firstcol, t_firstrow, &
    1476             :                                                   t_firstcol
    1477             : 
    1478             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
    1479             : 
    1480             :       INTEGER                                  :: handle, i, na, nb, ss, ts
    1481             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1482             : #if defined(__parallel)
    1483             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1484             : #endif
    1485             : 
    1486       11152 :       CALL timeset(routineN, handle)
    1487             : 
    1488       11152 :       a => msource%local_data
    1489       11152 :       b => mtarget%local_data
    1490             : 
    1491       11152 :       na = msource%matrix_struct%nrow_global
    1492       11152 :       nb = mtarget%matrix_struct%nrow_global
    1493             : !    nrow must be <= na and nb
    1494       11152 :       IF (nrow > na) &
    1495           0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1496       11152 :       IF (nrow > nb) &
    1497           0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1498       11152 :       na = msource%matrix_struct%ncol_global
    1499       11152 :       nb = mtarget%matrix_struct%ncol_global
    1500             : !    ncol must be <= na_col and nb_col
    1501       11152 :       IF (ncol > na) &
    1502           0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1503       11152 :       IF (ncol > nb) &
    1504           0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1505             : 
    1506             : #if defined(__parallel)
    1507      111520 :       desca(:) = msource%matrix_struct%descriptor(:)
    1508      111520 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1509      126112 :       DO i = 0, ncol - 1
    1510      114960 :          ss = s_firstcol + i
    1511      114960 :          ts = t_firstcol + i
    1512      126112 :          CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
    1513             :       END DO
    1514             : #else
    1515             :       DO i = 0, ncol - 1
    1516             :          ss = s_firstcol + i
    1517             :          ts = t_firstcol + i
    1518             :          CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
    1519             :       END DO
    1520             : #endif
    1521             : 
    1522       11152 :       CALL timestop(handle)
    1523       11152 :    END SUBROUTINE cp_fm_to_fm_submat
    1524             : 
    1525             : ! **************************************************************************************************
    1526             : !> \brief General copy of a fm matrix to another fm matrix.
    1527             : !>        Uses non-blocking MPI rather than ScaLAPACK.
    1528             : !>
    1529             : !> \param source          input fm matrix
    1530             : !> \param destination     output fm matrix
    1531             : !> \param para_env        parallel environment corresponding to the BLACS env that covers all parts
    1532             : !>                        of the input and output matrices
    1533             : !> \par History
    1534             : !>      31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
    1535             : ! **************************************************************************************************
    1536        8806 :    SUBROUTINE cp_fm_copy_general(source, destination, para_env)
    1537             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1538             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1539             : 
    1540             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
    1541             : 
    1542             :       INTEGER                                            :: handle
    1543       79254 :       TYPE(copy_info_type)                               :: info
    1544             : 
    1545        8806 :       CALL timeset(routineN, handle)
    1546             : 
    1547        8806 :       CALL cp_fm_start_copy_general(source, destination, para_env, info)
    1548        8806 :       IF (ASSOCIATED(destination%matrix_struct)) THEN
    1549        8794 :          CALL cp_fm_finish_copy_general(destination, info)
    1550             :       END IF
    1551        8806 :       IF (ASSOCIATED(source%matrix_struct)) THEN
    1552        8654 :          CALL cp_fm_cleanup_copy_general(info)
    1553             :       END IF
    1554             : 
    1555        8806 :       CALL timestop(handle)
    1556        8806 :    END SUBROUTINE cp_fm_copy_general
    1557             : 
    1558             : ! **************************************************************************************************
    1559             : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
    1560             : !> \param source input fm matrix
    1561             : !> \param destination output fm matrix
    1562             : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
    1563             : !>                        of the input and output matrices
    1564             : !> \param info all of the data that will be needed to complete the copy operation
    1565             : ! **************************************************************************************************
    1566     1594420 :    SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
    1567             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1568             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1569             :       TYPE(copy_info_type), INTENT(OUT)                  :: info
    1570             : 
    1571             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
    1572             : 
    1573             :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
    1574             :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
    1575             :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
    1576             :          recv_rank, recv_size, send_rank, send_size
    1577      159442 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
    1578      318884 :                                                             recv_count, send_count, send_disp, &
    1579      159442 :                                                             source2global, src_p, src_q
    1580      159442 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
    1581             :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
    1582             :                                                             src_block, src_block_tmp, src_num_pe
    1583      318884 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
    1584      318884 :                                                             send_col_indices, send_row_indices
    1585             :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
    1586     2232188 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
    1587             : 
    1588      159442 :       CALL timeset(routineN, handle)
    1589             : 
    1590             :       IF (.NOT. cp2k_is_parallel) THEN
    1591             :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
    1592             :          nrow_local_send = SIZE(source%local_data, 1)
    1593             :          ncol_local_send = SIZE(source%local_data, 2)
    1594             :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
    1595             :          k = 0
    1596             :          DO j = 1, ncol_local_send
    1597             :             DO i = 1, nrow_local_send
    1598             :                k = k + 1
    1599             :                info%send_buf(k) = source%local_data(i, j)
    1600             :             END DO
    1601             :          END DO
    1602             :       ELSE
    1603             :          ! assumption of double precision data is carried forward from ScaLAPACK version
    1604      159442 :          IF (source%use_sp) CPABORT("only DP kind implemented")
    1605      159442 :          IF (destination%use_sp) CPABORT("only DP kind implemented")
    1606             : 
    1607      159442 :          NULLIFY (recv_dist, send_dist)
    1608      159442 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
    1609             : 
    1610             :          ! The 'global' communicator contains both the source and destination decompositions
    1611      159442 :          global_size = para_env%num_pe
    1612      159442 :          global_rank = para_env%mepos
    1613             : 
    1614             :          ! The source/send decomposition and destination/recv decompositions may only exist on
    1615             :          ! on a subset of the processes involved in the communication
    1616             :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
    1617             :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
    1618      159442 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1619      127470 :             recv_dist => destination%matrix_struct
    1620      127470 :             recv_rank = recv_dist%para_env%mepos
    1621             :          ELSE
    1622       31972 :             recv_rank = mp_proc_null
    1623             :          END IF
    1624             : 
    1625      159442 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1626      140922 :             send_dist => source%matrix_struct
    1627      140922 :             send_rank = send_dist%para_env%mepos
    1628             :          ELSE
    1629       18520 :             send_rank = mp_proc_null
    1630             :          END IF
    1631             : 
    1632             :          ! Map the rank in the source/dest communicator to the global rank
    1633      478326 :          ALLOCATE (all_ranks(0:global_size - 1))
    1634             : 
    1635      159442 :          CALL para_env%allgather(send_rank, all_ranks)
    1636      159442 :          IF (ASSOCIATED(recv_dist)) THEN
    1637      637350 :             ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
    1638      382410 :             DO i = 0, global_size - 1
    1639      382410 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
    1640      217900 :                   source2global(all_ranks(i)) = i
    1641             :                END IF
    1642             :             END DO
    1643             :          END IF
    1644             : 
    1645      159442 :          CALL para_env%allgather(recv_rank, all_ranks)
    1646      159442 :          IF (ASSOCIATED(send_dist)) THEN
    1647      704610 :             ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
    1648      422766 :             DO i = 0, global_size - 1
    1649      422766 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
    1650      217900 :                   dest2global(all_ranks(i)) = i
    1651             :                END IF
    1652             :             END DO
    1653             :          END IF
    1654      159442 :          DEALLOCATE (all_ranks)
    1655             : 
    1656             :          ! Some data from the two decompositions will be needed by all processes in the global group :
    1657             :          ! process grid shape, block size, and the BLACS-to-MPI mapping
    1658             : 
    1659             :          ! The global root process will receive the data (from the root process in each decomposition)
    1660     1116094 :          send_req(:) = mp_request_null
    1661      159442 :          IF (global_rank == 0) THEN
    1662      558047 :             recv_req(:) = mp_request_null
    1663       79721 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
    1664       79721 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
    1665       79721 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
    1666       79721 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
    1667             :          END IF
    1668             : 
    1669      159442 :          IF (ASSOCIATED(send_dist)) THEN
    1670      140922 :             IF ((send_rank .EQ. 0)) THEN
    1671             :                ! need to use separate buffers here in case this is actually global rank 0
    1672      239163 :                src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
    1673       79721 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
    1674       79721 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
    1675             :             END IF
    1676             :          END IF
    1677             : 
    1678      159442 :          IF (ASSOCIATED(recv_dist)) THEN
    1679      127470 :             IF ((recv_rank .EQ. 0)) THEN
    1680      239163 :                dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
    1681       79721 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
    1682       79721 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
    1683             :             END IF
    1684             :          END IF
    1685             : 
    1686      159442 :          IF (global_rank == 0) THEN
    1687       79721 :             CALL mp_waitall(recv_req(1:4))
    1688             :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
    1689           0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1690             :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1691      558047 :                       )
    1692       79721 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
    1693       79721 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
    1694             :          END IF
    1695             : 
    1696      159442 :          IF (ASSOCIATED(send_dist)) THEN
    1697      140922 :             IF ((send_rank .EQ. 0)) THEN
    1698       79721 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1699             :             END IF
    1700             :          END IF
    1701             : 
    1702      159442 :          IF (ASSOCIATED(recv_dist)) THEN
    1703      127470 :             IF ((recv_rank .EQ. 0)) THEN
    1704       79721 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1705             :             END IF
    1706             :          END IF
    1707             : 
    1708      159442 :          IF (global_rank == 0) THEN
    1709       79721 :             CALL mp_waitall(recv_req(5:6))
    1710             :          END IF
    1711             : 
    1712             :          ! Finally, broadcast the data to all processes in the global communicator
    1713      159442 :          CALL para_env%bcast(src_block, 0)
    1714      159442 :          CALL para_env%bcast(dest_block, 0)
    1715      159442 :          CALL para_env%bcast(src_num_pe, 0)
    1716      159442 :          CALL para_env%bcast(dest_num_pe, 0)
    1717      478326 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1718      478326 :          info%nblock_src(1:2) = src_block(1:2)
    1719      159442 :          IF (global_rank .NE. 0) THEN
    1720           0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1721           0 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1722      558047 :                       )
    1723             :          END IF
    1724      159442 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1725      159442 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1726             : 
    1727      159442 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1728      159442 :          send_size = src_num_pe(1)*src_num_pe(2)
    1729      159442 :          info%send_size = send_size
    1730      159442 :          CALL mp_waitall(send_req(:))
    1731             : 
    1732             :          ! Setup is now complete, we can start the actual communication here.
    1733             :          ! The order implemented here is:
    1734             :          !  DEST_1
    1735             :          !      compute recv sizes
    1736             :          !      call irecv
    1737             :          !  SRC_1
    1738             :          !      compute send sizes
    1739             :          !      pack send buffers
    1740             :          !      call isend
    1741             :          !  DEST_2
    1742             :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1743             :          !  SRC_2
    1744             :          !      wait for the sends
    1745             : 
    1746             :          ! DEST_1
    1747      159442 :          IF (ASSOCIATED(recv_dist)) THEN
    1748             :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1749             :                                   col_indices=recv_col_indices &
    1750      127470 :                                   )
    1751      127470 :             info%recv_col_indices => recv_col_indices
    1752      127470 :             info%recv_row_indices => recv_row_indices
    1753      127470 :             nrow_block_src = src_block(1)
    1754      127470 :             ncol_block_src = src_block(2)
    1755      855250 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1756             : 
    1757             :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1758      127470 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1759      127470 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1760      127470 :             info%nlocal_recv(1) = nrow_local_recv
    1761      127470 :             info%nlocal_recv(2) = ncol_local_recv
    1762             :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1763      637350 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1764     2422161 :             DO i = 1, nrow_local_recv
    1765             :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1766             :                ! then work out which row block it comes from, and which process row that row block comes from.
    1767     2422161 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1768             :             END DO
    1769     4062996 :             DO j = 1, ncol_local_recv
    1770             :                ! Similarly for the columns
    1771     4062996 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1772             :             END DO
    1773             :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1774             : 
    1775      254940 :             DO q = 0, src_num_pe(2) - 1
    1776     4062996 :                ncols = COUNT(src_q .EQ. q)
    1777      472840 :                DO p = 0, src_num_pe(1) - 1
    1778     4430466 :                   nrows = COUNT(src_p .EQ. p)
    1779             :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1780      345370 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1781             :                END DO
    1782             :             END DO
    1783      127470 :             DEALLOCATE (src_p, src_q)
    1784             : 
    1785             :             ! Use one long buffer (and displacements into that buffer)
    1786             :             !     this prevents the need for a rectangular array where not all elements will be populated
    1787      600310 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1788      127470 :             info%recv_disp(0) = 0
    1789      217900 :             DO i = 1, send_size - 1
    1790      217900 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1791             :             END DO
    1792             : 
    1793             :             ! Issue receive calls on ranks which expect data
    1794      345370 :             DO k = 0, send_size - 1
    1795      345370 :                IF (recv_count(k) .GT. 0) THEN
    1796             :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1797      159886 :                                       source2global(k), info%recv_request(k))
    1798             :                END IF
    1799             :             END DO
    1800      127470 :             DEALLOCATE (source2global)
    1801             :          END IF ! ASSOCIATED(recv_dist)
    1802             : 
    1803             :          ! SRC_1
    1804      159442 :          IF (ASSOCIATED(send_dist)) THEN
    1805             :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1806             :                                   col_indices=send_col_indices &
    1807      140922 :                                   )
    1808      140922 :             nrow_block_dest = dest_block(1)
    1809      140922 :             ncol_block_dest = dest_block(2)
    1810      922510 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1811             : 
    1812             :             ! Determine the send counts, allocate the send buffers
    1813      140922 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1814      140922 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1815             : 
    1816             :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1817             :             !   i.e. number of rows,cols in the sending distribution
    1818      704610 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1819             : 
    1820     2435613 :             DO i = 1, nrow_local_send
    1821             :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1822     2435613 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1823             :             END DO
    1824     4347180 :             DO j = 1, ncol_local_send
    1825     4347180 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1826             :             END DO
    1827             :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1828             : 
    1829      281844 :             DO q = 0, dest_num_pe(2) - 1
    1830     4347180 :                ncols = COUNT(dest_q .EQ. q)
    1831      499744 :                DO p = 0, dest_num_pe(1) - 1
    1832     4160238 :                   nrows = COUNT(dest_p .EQ. p)
    1833      358822 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1834             :                END DO
    1835             :             END DO
    1836      140922 :             DEALLOCATE (dest_p, dest_q)
    1837             : 
    1838             :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1839      640666 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1840      140922 :             send_disp(0) = 0
    1841      217900 :             DO k = 1, recv_size - 1
    1842      217900 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1843             :             END DO
    1844             : 
    1845             :             ! Loop over the smat, pack the send buffers
    1846      358822 :             send_count(:) = 0
    1847     4347180 :             DO j = 1, ncol_local_send
    1848             :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1849     4206258 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1850   172252494 :                DO i = 1, nrow_local_send
    1851   167905314 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1852   167905314 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1853   167905314 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1854   172111572 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1855             :                END DO
    1856             :             END DO
    1857             : 
    1858             :             ! For each non-zero send_count, call mpi_isend
    1859      358822 :             DO k = 0, recv_size - 1
    1860      358822 :                IF (send_count(k) .GT. 0) THEN
    1861             :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1862      159886 :                                       dest2global(k), info%send_request(k))
    1863             :                END IF
    1864             :             END DO
    1865      140922 :             DEALLOCATE (send_count, send_disp, dest2global)
    1866             :          END IF ! ASSOCIATED(send_dist)
    1867      159442 :          DEALLOCATE (dest_blacs2mpi)
    1868             : 
    1869             :       END IF !IF (.NOT. cp2k_is_parallel)
    1870             : 
    1871      159442 :       CALL timestop(handle)
    1872             : 
    1873      637768 :    END SUBROUTINE cp_fm_start_copy_general
    1874             : 
    1875             : ! **************************************************************************************************
    1876             : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
    1877             : !> \param destination output fm matrix
    1878             : !> \param info all of the data that will be needed to complete the copy operation
    1879             : ! **************************************************************************************************
    1880      127470 :    SUBROUTINE cp_fm_finish_copy_general(destination, info)
    1881             :       TYPE(cp_fm_type), INTENT(IN)                       :: destination
    1882             :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1883             : 
    1884             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
    1885             : 
    1886             :       INTEGER                                            :: handle, i, j, k, mpi_rank, send_size, &
    1887             :                                                             src_p_i, src_q_j
    1888      127470 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count
    1889             :       INTEGER, DIMENSION(2)                              :: nblock_src, nlocal_recv, src_num_pe
    1890      127470 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1891             : 
    1892      127470 :       CALL timeset(routineN, handle)
    1893             : 
    1894             :       IF (.NOT. cp2k_is_parallel) THEN
    1895             :          ! Now unpack the data from the 'send buffer'
    1896             :          k = 0
    1897             :          DO j = 1, SIZE(destination%local_data, 2)
    1898             :             DO i = 1, SIZE(destination%local_data, 1)
    1899             :                k = k + 1
    1900             :                destination%local_data(i, j) = info%send_buf(k)
    1901             :             END DO
    1902             :          END DO
    1903             :          DEALLOCATE (info%send_buf)
    1904             :       ELSE
    1905             :          ! Set up local variables ...
    1906      127470 :          send_size = info%send_size
    1907      382410 :          nlocal_recv(1:2) = info%nlocal_recv(:)
    1908      382410 :          nblock_src(1:2) = info%nblock_src(:)
    1909      382410 :          src_num_pe(1:2) = info%src_num_pe(:)
    1910      127470 :          recv_col_indices => info%recv_col_indices
    1911      127470 :          recv_row_indices => info%recv_row_indices
    1912             : 
    1913             :          ! ... use the local variables to do the work
    1914             :          ! DEST_2
    1915      127470 :          CALL mp_waitall(info%recv_request(:))
    1916      382410 :          ALLOCATE (recv_count(0:send_size - 1))
    1917             :          ! Loop over the rmat, filling it in with data from the recv buffers
    1918             :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1919      345370 :          recv_count(:) = 0
    1920     4062996 :          DO j = 1, nlocal_recv(2)
    1921     3935526 :             src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
    1922   171968310 :             DO i = 1, nlocal_recv(1)
    1923   167905314 :                src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
    1924   167905314 :                mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
    1925   167905314 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    1926   171840840 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    1927             :             END DO
    1928             :          END DO
    1929      127470 :          DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    1930             :          ! Invalidate the stored state
    1931             :          NULLIFY (info%recv_col_indices, &
    1932      127470 :                   info%recv_row_indices)
    1933             : 
    1934             :       END IF
    1935             : 
    1936      127470 :       CALL timestop(handle)
    1937             : 
    1938      127470 :    END SUBROUTINE cp_fm_finish_copy_general
    1939             : 
    1940             : ! **************************************************************************************************
    1941             : !> \brief Completes the copy operation: wait for comms clean up MPI state
    1942             : !> \param info all of the data that will be needed to complete the copy operation
    1943             : ! **************************************************************************************************
    1944      140258 :    SUBROUTINE cp_fm_cleanup_copy_general(info)
    1945             :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1946             : 
    1947             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
    1948             : 
    1949             :       INTEGER                                            :: handle
    1950             : 
    1951      140258 :       CALL timeset(routineN, handle)
    1952             : 
    1953             :       IF (.NOT. cp2k_is_parallel) THEN
    1954             :          ! Don't do anything - no MPI state for the serial case
    1955             :       ELSE
    1956             :          ! SRC_2
    1957             :          ! If this process is also in the destination decomposition, this deallocate
    1958             :          ! Was already done in cp_fm_finish_copy_general
    1959      140258 :          IF (ALLOCATED(info%src_blacs2mpi)) THEN
    1960       31308 :             DEALLOCATE (info%src_blacs2mpi)
    1961             :          END IF
    1962      140258 :          CALL mp_waitall(info%send_request)
    1963      140258 :          DEALLOCATE (info%send_request, info%send_buf)
    1964             : 
    1965             :       END IF
    1966             : 
    1967      140258 :       CALL timestop(handle)
    1968             : 
    1969      140258 :    END SUBROUTINE cp_fm_cleanup_copy_general
    1970             : 
    1971             : ! **************************************************************************************************
    1972             : !> \brief General copy of a submatrix of fm matrix to  a submatrix of another fm matrix.
    1973             : !>        The two matrices can have different contexts.
    1974             : !>
    1975             : !>        Summary of distribution routines for dense matrices
    1976             : !>        The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
    1977             : !>
    1978             : !>        call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
    1979             : !>
    1980             : !>        A process that is not a part of the context of A should set descA(2)
    1981             : !>        to -1, and similarly for B.
    1982             : !>
    1983             : !> \param source          input fm matrix
    1984             : !> \param destination     output fm matrix
    1985             : !> \param nrows           number of rows of sub matrix to be copied
    1986             : !> \param ncols           number of cols of sub matrix to be copied
    1987             : !> \param s_firstrow      starting global row index of sub matrix in source
    1988             : !> \param s_firstcol      starting global col index of sub matrix in source
    1989             : !> \param d_firstrow      starting global row index of sub matrix in destination
    1990             : !> \param d_firstcol      starting global col index of sub matrix in destination
    1991             : !> \param global_context  process grid that covers all parts of either A or B.
    1992             : ! **************************************************************************************************
    1993       16506 :    SUBROUTINE cp_fm_to_fm_submat_general(source, &
    1994             :                                          destination, &
    1995             :                                          nrows, &
    1996             :                                          ncols, &
    1997             :                                          s_firstrow, &
    1998             :                                          s_firstcol, &
    1999             :                                          d_firstrow, &
    2000             :                                          d_firstcol, &
    2001             :                                          global_context)
    2002             : 
    2003             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    2004             :       INTEGER, INTENT(IN)                                :: nrows, ncols, s_firstrow, s_firstcol, &
    2005             :                                                             d_firstrow, d_firstcol
    2006             : 
    2007             :       CLASS(cp_blacs_type), INTENT(IN)        :: global_context
    2008             : 
    2009             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
    2010             : 
    2011             :       LOGICAL                                  :: debug
    2012             :       INTEGER                                  :: handle
    2013             : #if defined(__parallel)
    2014             :       INTEGER, DIMENSION(9)                    :: desca, descb
    2015             :       REAL(KIND=dp), DIMENSION(1, 1), TARGET   :: dummy
    2016       16506 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: smat, dmat
    2017             : #endif
    2018             : 
    2019       16506 :       CALL timeset(routineN, handle)
    2020             : 
    2021       16506 :       debug = debug_this_module
    2022             : 
    2023             :       IF (.NOT. cp2k_is_parallel) THEN
    2024             :          CALL cp_fm_to_fm_submat(source, &
    2025             :                                  destination, &
    2026             :                                  nrows, &
    2027             :                                  ncols, &
    2028             :                                  s_firstrow, &
    2029             :                                  s_firstcol, &
    2030             :                                  d_firstrow, &
    2031             :                                  d_firstcol)
    2032             :       ELSE
    2033             : #ifdef __parallel
    2034       16506 :          NULLIFY (smat, dmat)
    2035             :          ! check whether source is available on this process
    2036       16506 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    2037      165060 :             desca = source%matrix_struct%descriptor
    2038       16506 :             IF (source%use_sp) CPABORT("only DP kind implemented")
    2039       16506 :             IF (nrows .GT. source%matrix_struct%nrow_global) &
    2040           0 :                CPABORT("nrows is greater than nrow_global of source")
    2041       16506 :             IF (ncols .GT. source%matrix_struct%ncol_global) &
    2042           0 :                CPABORT("ncols is greater than ncol_global of source")
    2043       16506 :             smat => source%local_data
    2044             :          ELSE
    2045           0 :             desca = -1
    2046           0 :             smat => dummy
    2047             :          END IF
    2048             :          ! check destination is available on this process
    2049       16506 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    2050      165060 :             descb = destination%matrix_struct%descriptor
    2051       16506 :             IF (destination%use_sp) CPABORT("only DP kind implemented")
    2052       16506 :             IF (nrows .GT. destination%matrix_struct%nrow_global) &
    2053           0 :                CPABORT("nrows is greater than nrow_global of destination")
    2054       16506 :             IF (ncols .GT. destination%matrix_struct%ncol_global) &
    2055           0 :                CPABORT("ncols is greater than ncol_global of destination")
    2056       16506 :             dmat => destination%local_data
    2057             :          ELSE
    2058           0 :             descb = -1
    2059           0 :             dmat => dummy
    2060             :          END IF
    2061             :          ! do copy
    2062             : 
    2063             :          CALL pdgemr2d(nrows, &
    2064             :                        ncols, &
    2065             :                        smat, &
    2066             :                        s_firstrow, &
    2067             :                        s_firstcol, &
    2068             :                        desca, &
    2069             :                        dmat, &
    2070             :                        d_firstrow, &
    2071             :                        d_firstcol, &
    2072             :                        descb, &
    2073       16506 :                        global_context%get_handle())
    2074             : #else
    2075             :          MARK_USED(global_context)
    2076             :          CPABORT("this subroutine only supports SCALAPACK")
    2077             : #endif
    2078             :       END IF
    2079             : 
    2080       16506 :       CALL timestop(handle)
    2081             : 
    2082       16506 :    END SUBROUTINE cp_fm_to_fm_submat_general
    2083             : 
    2084             : ! **************************************************************************************************
    2085             : !> \brief ...
    2086             : !> \param matrix ...
    2087             : !> \param irow_global ...
    2088             : !> \param icol_global ...
    2089             : !> \param alpha ...
    2090             : ! **************************************************************************************************
    2091         240 :    SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
    2092             : 
    2093             :       ! Add alpha to the matrix element specified by the global indices
    2094             :       ! irow_global and icol_global
    2095             : 
    2096             :       ! - Creation (05.05.06,MK)
    2097             : 
    2098             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
    2099             :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
    2100             :       REAL(KIND=dp), INTENT(IN)                :: alpha
    2101             : 
    2102             :       INTEGER                                  :: mypcol, myprow, npcol, nprow
    2103         240 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2104             :       TYPE(cp_blacs_env_type), POINTER         :: context
    2105             : #if defined(__parallel)
    2106             :       INTEGER                                  :: icol_local, ipcol, iprow, &
    2107             :                                                   irow_local
    2108             :       INTEGER, DIMENSION(9)                    :: desca
    2109             : #endif
    2110             : 
    2111         240 :       context => matrix%matrix_struct%context
    2112             : 
    2113         240 :       myprow = context%mepos(1)
    2114         240 :       mypcol = context%mepos(2)
    2115             : 
    2116         240 :       nprow = context%num_pe(1)
    2117         240 :       npcol = context%num_pe(2)
    2118             : 
    2119         240 :       a => matrix%local_data
    2120             : 
    2121             : #if defined(__parallel)
    2122             : 
    2123        2400 :       desca(:) = matrix%matrix_struct%descriptor(:)
    2124             : 
    2125             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
    2126         240 :                    irow_local, icol_local, iprow, ipcol)
    2127             : 
    2128         240 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
    2129         120 :          a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
    2130             :       END IF
    2131             : 
    2132             : #else
    2133             : 
    2134             :       a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
    2135             : 
    2136             : #endif
    2137             : 
    2138         240 :    END SUBROUTINE cp_fm_add_to_element
    2139             : 
    2140             : ! **************************************************************************************************
    2141             : !> \brief ...
    2142             : !> \param fm ...
    2143             : !> \param unit ...
    2144             : ! **************************************************************************************************
    2145       78144 :    SUBROUTINE cp_fm_write_unformatted(fm, unit)
    2146             :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2147             :       INTEGER, INTENT(IN)                      :: unit
    2148             : 
    2149             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
    2150             : 
    2151             :       INTEGER                                  :: handle, j, max_block, &
    2152             :                                                   ncol_global, nrow_global
    2153             :       TYPE(mp_para_env_type), POINTER          :: para_env
    2154             : #if defined(__parallel)
    2155             :       INTEGER                                  :: i, i_block, icol_local, &
    2156             :                                                   in, info, ipcol, &
    2157             :                                                   iprow, irow_local, &
    2158             :                                                   mepos, &
    2159             :                                                   num_pe, rb, tag
    2160             :       INTEGER, DIMENSION(9)                    :: desc
    2161       78144 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2162             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2163             :       TYPE(cp_blacs_type) :: ictxt_loc
    2164             :       INTEGER, EXTERNAL                        :: numroc
    2165             : #endif
    2166             : 
    2167       78144 :       CALL timeset(routineN, handle)
    2168             :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2169       78144 :                           para_env=para_env)
    2170             : 
    2171             : #if defined(__parallel)
    2172       78144 :       num_pe = para_env%num_pe
    2173       78144 :       mepos = para_env%mepos
    2174       78144 :       rb = nrow_global
    2175       78144 :       tag = 0
    2176             :       ! get a new context
    2177       78144 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2178       78144 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2179       78144 :       CPASSERT(info == 0)
    2180             :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2181             :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2182      156288 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2183             : 
    2184      312576 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2185             : 
    2186             :          ! do the actual scalapack to cols reordering
    2187             :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2188             :                        fm%matrix_struct%descriptor, &
    2189       78144 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2190             : 
    2191      234432 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2192    54891602 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2193             : 
    2194      343597 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2195      187309 :             i_block = MIN(max_block, ncol_global - i + 1)
    2196             :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2197      187309 :                          irow_local, icol_local, iprow, ipcol)
    2198      187309 :             IF (ipcol == mypcol) THEN
    2199      935351 :                DO j = 1, i_block
    2200   145946249 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2201             :                END DO
    2202             :             END IF
    2203             : 
    2204      187309 :             IF (ipcol == 0) THEN
    2205             :                ! do nothing
    2206             :             ELSE
    2207       64320 :                IF (ipcol == mypcol) THEN
    2208    36106610 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2209             :                END IF
    2210       64320 :                IF (mypcol == 0) THEN
    2211    72181060 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2212             :                END IF
    2213             :             END IF
    2214             : 
    2215      265453 :             IF (unit > 0) THEN
    2216      905375 :                DO j = 1, i_block
    2217      905375 :                   WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
    2218             :                END DO
    2219             :             END IF
    2220             : 
    2221             :          END DO
    2222             :       END ASSOCIATE
    2223       78144 :       DEALLOCATE (vecbuf)
    2224             : 
    2225       78144 :       CALL ictxt_loc%gridexit()
    2226             : 
    2227       78144 :       DEALLOCATE (newdat)
    2228             : 
    2229             : #else
    2230             : 
    2231             :       IF (unit > 0) THEN
    2232             :          DO j = 1, ncol_global
    2233             :             WRITE (unit) fm%local_data(:, j)
    2234             :          END DO
    2235             :       END IF
    2236             : 
    2237             : #endif
    2238       78144 :       CALL timestop(handle)
    2239             : 
    2240      390720 :    END SUBROUTINE cp_fm_write_unformatted
    2241             : 
    2242             : ! **************************************************************************************************
    2243             : !> \brief Write out a full matrix in plain text.
    2244             : !> \param fm the matrix to be outputted
    2245             : !> \param unit the unit number for I/O
    2246             : !> \param header optional header
    2247             : !> \param value_format ...
    2248             : ! **************************************************************************************************
    2249        1226 :    SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
    2250             :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2251             :       INTEGER, INTENT(IN)                      :: unit
    2252             :       CHARACTER(LEN=*), INTENT(IN), OPTIONAL   :: header, value_format
    2253             : 
    2254             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
    2255             : 
    2256             :       CHARACTER(LEN=21)                        :: my_value_format
    2257             :       INTEGER                                  :: handle, i, j, max_block, &
    2258             :                                                   ncol_global, nrow_global
    2259             :       TYPE(mp_para_env_type), POINTER          :: para_env
    2260             : #if defined(__parallel)
    2261             :       INTEGER                                  :: i_block, icol_local, &
    2262             :                                                   in, info, ipcol, &
    2263             :                                                   iprow, irow_local, &
    2264             :                                                   mepos, num_pe, rb, tag, k, &
    2265             :                                                   icol, irow
    2266             :       INTEGER, DIMENSION(9)                    :: desc
    2267        1226 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2268        1226 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2269             :       TYPE(cp_blacs_type) :: ictxt_loc
    2270             :       INTEGER, EXTERNAL                        :: numroc
    2271             : #endif
    2272             : 
    2273        1226 :       CALL timeset(routineN, handle)
    2274             :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2275        1226 :                           para_env=para_env)
    2276             : 
    2277        1226 :       IF (PRESENT(value_format)) THEN
    2278           0 :          CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
    2279           0 :          my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
    2280             :       ELSE
    2281        1226 :          my_value_format = "(I10, I10, ES24.12)"
    2282             :       END IF
    2283             : 
    2284        1226 :       IF (unit > 0) THEN
    2285           5 :          IF (PRESENT(header)) WRITE (unit, *) header
    2286           5 :          WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
    2287             :       END IF
    2288             : 
    2289             : #if defined(__parallel)
    2290        1226 :       num_pe = para_env%num_pe
    2291        1226 :       mepos = para_env%mepos
    2292        1226 :       rb = nrow_global
    2293        1226 :       tag = 0
    2294             :       ! get a new context
    2295        1226 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2296        1226 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2297        1226 :       CPASSERT(info == 0)
    2298             :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2299             :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2300        2452 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2301             : 
    2302        4904 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2303             : 
    2304             :          ! do the actual scalapack to cols reordering
    2305             :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2306             :                        fm%matrix_struct%descriptor, &
    2307        1226 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2308             : 
    2309        3678 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2310        4736 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2311        1226 :          irow = 1
    2312        1226 :          icol = 1
    2313             : 
    2314        4898 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2315        2446 :             i_block = MIN(max_block, ncol_global - i + 1)
    2316             :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2317        2446 :                          irow_local, icol_local, iprow, ipcol)
    2318        2446 :             IF (ipcol == mypcol) THEN
    2319        2469 :                DO j = 1, i_block
    2320        8553 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2321             :                END DO
    2322             :             END IF
    2323             : 
    2324        2446 :             IF (ipcol == 0) THEN
    2325             :                ! do nothing
    2326             :             ELSE
    2327        1220 :                IF (ipcol == mypcol) THEN
    2328        2196 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2329             :                END IF
    2330        1220 :                IF (mypcol == 0) THEN
    2331        3782 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2332             :                END IF
    2333             :             END IF
    2334             : 
    2335        3672 :             IF (unit > 0) THEN
    2336          37 :                DO j = 1, i_block
    2337         647 :                   DO k = (j - 1)*nrow_global + 1, nrow_global*j
    2338         610 :                      WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
    2339         610 :                      irow = irow + 1
    2340         640 :                      IF (irow > nrow_global) THEN
    2341          30 :                         irow = 1
    2342          30 :                         icol = icol + 1
    2343             :                      END IF
    2344             :                   END DO
    2345             :                END DO
    2346             :             END IF
    2347             : 
    2348             :          END DO
    2349             :       END ASSOCIATE
    2350        1226 :       DEALLOCATE (vecbuf)
    2351             : 
    2352        1226 :       CALL ictxt_loc%gridexit()
    2353             : 
    2354        1226 :       DEALLOCATE (newdat)
    2355             : 
    2356             : #else
    2357             : 
    2358             :       IF (unit > 0) THEN
    2359             :          DO j = 1, ncol_global
    2360             :             DO i = 1, nrow_global
    2361             :                WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
    2362             :             END DO
    2363             :          END DO
    2364             :       END IF
    2365             : 
    2366             : #endif
    2367        1226 :       CALL timestop(handle)
    2368             : 
    2369        6130 :    END SUBROUTINE cp_fm_write_formatted
    2370             : 
    2371             : ! **************************************************************************************************
    2372             : !> \brief ...
    2373             : !> \param fm ...
    2374             : !> \param unit ...
    2375             : ! **************************************************************************************************
    2376        1516 :    SUBROUTINE cp_fm_read_unformatted(fm, unit)
    2377             :       TYPE(cp_fm_type), INTENT(INOUT)          :: fm
    2378             :       INTEGER, INTENT(IN)                      :: unit
    2379             : 
    2380             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
    2381             : 
    2382             :       INTEGER                                  :: handle, j, max_block, &
    2383             :                                                   ncol_global, nrow_global
    2384             :       TYPE(mp_para_env_type), POINTER          :: para_env
    2385             : #if defined(__parallel)
    2386             :       INTEGER                                  :: k, n_cols
    2387             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: vecbuf
    2388             : #endif
    2389             : 
    2390        1516 :       CALL timeset(routineN, handle)
    2391             : 
    2392             :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2393        1516 :                           para_env=para_env)
    2394             : 
    2395             : #if defined(__parallel)
    2396             : 
    2397             :       ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
    2398             : 
    2399        6064 :       ALLOCATE (vecbuf(nrow_global, max_block))
    2400             : 
    2401        5684 :       DO j = 1, ncol_global, max_block
    2402             : 
    2403        4168 :          n_cols = MIN(max_block, ncol_global - j + 1)
    2404        4168 :          IF (para_env%mepos == 0) THEN
    2405       32577 :             DO k = 1, n_cols
    2406       32577 :                READ (unit) vecbuf(:, k)
    2407             :             END DO
    2408             :          END IF
    2409    11541680 :          CALL para_env%bcast(vecbuf, 0)
    2410        5684 :          CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
    2411             : 
    2412             :       END DO
    2413             : 
    2414        1516 :       DEALLOCATE (vecbuf)
    2415             : 
    2416             : #else
    2417             : 
    2418             :       DO j = 1, ncol_global
    2419             :          READ (unit) fm%local_data(:, j)
    2420             :       END DO
    2421             : 
    2422             : #endif
    2423             : 
    2424        1516 :       CALL timestop(handle)
    2425             : 
    2426        1516 :    END SUBROUTINE cp_fm_read_unformatted
    2427             : 
    2428             : ! **************************************************************************************************
    2429             : !> \brief ...
    2430             : !> \param mm_type ...
    2431             : ! **************************************************************************************************
    2432        9801 :    SUBROUTINE cp_fm_setup(mm_type)
    2433             :       INTEGER, INTENT(IN)                                :: mm_type
    2434             : 
    2435        9801 :       cp_fm_mm_type = mm_type
    2436        9801 :    END SUBROUTINE cp_fm_setup
    2437             : 
    2438             : ! **************************************************************************************************
    2439             : !> \brief ...
    2440             : !> \return ...
    2441             : ! **************************************************************************************************
    2442     1417028 :    FUNCTION cp_fm_get_mm_type() RESULT(res)
    2443             :       INTEGER                                            :: res
    2444             : 
    2445     1417028 :       res = cp_fm_mm_type
    2446     1417028 :    END FUNCTION
    2447             : 
    2448             : ! **************************************************************************************************
    2449             : !> \brief ...
    2450             : !> \param ictxt ...
    2451             : !> \param prec ...
    2452             : !> \return ...
    2453             : ! **************************************************************************************************
    2454          10 :    FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
    2455             :       INTEGER                                            :: ictxt
    2456             :       CHARACTER(LEN=1)                                   :: prec
    2457             :       INTEGER                                            :: res
    2458             : #if defined(__parallel)
    2459             :       INTEGER                                            :: pilaenv
    2460          10 :       res = pilaenv(ictxt, prec)
    2461             : #else
    2462             :       MARK_USED(ictxt)
    2463             :       MARK_USED(prec)
    2464             :       res = -1
    2465             : #endif
    2466             : 
    2467          10 :    END FUNCTION
    2468             : 
    2469           0 : END MODULE cp_fm_types

Generated by: LCOV version 1.15