LCOV - code coverage report
Current view: top level - src - cp_dbcsr_operations.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b8e0b09) Lines: 364 456 79.8 %
Date: 2024-08-31 06:31:37 Functions: 25 32 78.1 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief   DBCSR operations in CP2K
      10             : !> \author  Urban Borstnik
      11             : !> \date    2009-05-12
      12             : !> \version 0.8
      13             : !>
      14             : !> <b>Modification history:</b>
      15             : !> - Created 2009-05-12
      16             : !> - Generalized sm_fm_mulitply for matrices w/ different row/col block size (A. Bussy, 11.2018)
      17             : ! **************************************************************************************************
      18             : MODULE cp_dbcsr_operations
      19             :    USE mathlib, ONLY: lcm, gcd
      20             :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21             :    USE cp_cfm_types, ONLY: cp_cfm_type
      22             :    USE cp_dbcsr_api, ONLY: dbcsr_distribution_get, &
      23             :                            dbcsr_convert_sizes_to_offsets, dbcsr_add, &
      24             :                            dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, &
      25             :                            dbcsr_deallocate_matrix, &
      26             :                            dbcsr_desymmetrize, dbcsr_distribution_new, &
      27             :                            dbcsr_get_data_type, dbcsr_get_info, dbcsr_get_matrix_type, &
      28             :                            dbcsr_iterator_type, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      29             :                            dbcsr_iterator_start, dbcsr_iterator_stop, &
      30             :                            dbcsr_multiply, dbcsr_norm, dbcsr_p_type, dbcsr_release, &
      31             :                            dbcsr_reserve_all_blocks, dbcsr_scale, dbcsr_type, &
      32             :                            dbcsr_valid_index, dbcsr_verify_matrix, &
      33             :                            dbcsr_distribution_type, dbcsr_distribution_release, &
      34             :                            dbcsr_norm_frobenius, &
      35             :                            dbcsr_type_antisymmetric, dbcsr_type_complex_8, dbcsr_type_no_symmetry, dbcsr_type_real_8, &
      36             :                            dbcsr_type_symmetric
      37             :    USE cp_fm_basic_linalg, ONLY: cp_fm_gemm
      38             :    USE cp_fm_struct, ONLY: cp_fm_struct_create, &
      39             :                            cp_fm_struct_release, &
      40             :                            cp_fm_struct_type
      41             :    USE cp_fm_types, ONLY: cp_fm_create, &
      42             :                           cp_fm_get_info, &
      43             :                           cp_fm_release, &
      44             :                           cp_fm_to_fm, &
      45             :                           cp_fm_type
      46             :    USE message_passing, ONLY: mp_para_env_type
      47             :    USE distribution_2d_types, ONLY: distribution_2d_get, &
      48             :                                     distribution_2d_type
      49             :    USE kinds, ONLY: dp, default_string_length
      50             :    USE message_passing, ONLY: mp_comm_type
      51             : 
      52             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      53             : #include "base/base_uses.f90"
      54             : 
      55             :    IMPLICIT NONE
      56             : 
      57             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_dbcsr_operations'
      58             :    LOGICAL, PARAMETER :: debug_mod = .FALSE.
      59             : 
      60             :    INTEGER, SAVE, PUBLIC :: max_elements_per_block = 32
      61             : 
      62             :    PUBLIC :: dbcsr_multiply_local
      63             : 
      64             :    ! CP2K API emulation
      65             :    PUBLIC :: copy_fm_to_dbcsr, copy_dbcsr_to_fm, &
      66             :              copy_dbcsr_to_cfm, copy_cfm_to_dbcsr, &
      67             :              cp_dbcsr_sm_fm_multiply, cp_dbcsr_plus_fm_fm_t, &
      68             :              copy_dbcsr_to_fm_bc, copy_fm_to_dbcsr_bc, cp_fm_to_dbcsr_row_template, &
      69             :              cp_dbcsr_m_by_n_from_template, cp_dbcsr_m_by_n_from_row_template, &
      70             :              dbcsr_create_dist_r_unrot
      71             : 
      72             :    ! distribution_2d_type compatibility
      73             :    PUBLIC :: cp_dbcsr_dist2d_to_dist
      74             : 
      75             :    PUBLIC :: dbcsr_copy_columns_hack
      76             : 
      77             :    ! matrix set
      78             :    PUBLIC :: dbcsr_allocate_matrix_set
      79             :    PUBLIC :: dbcsr_deallocate_matrix_set
      80             : 
      81             :    INTERFACE dbcsr_allocate_matrix_set
      82             :       #:for ii in range(1, 6)
      83             :          MODULE PROCEDURE allocate_dbcsr_matrix_set_${ii}$d
      84             :       #:endfor
      85             :    END INTERFACE
      86             : 
      87             :    INTERFACE dbcsr_deallocate_matrix_set
      88             :       #:for ii in range(1, 6)
      89             :          MODULE PROCEDURE deallocate_dbcsr_matrix_set_${ii}$d
      90             :       #:endfor
      91             :    END INTERFACE
      92             : 
      93             :    PRIVATE
      94             : 
      95             : CONTAINS
      96             : 
      97             :    #:for fm, type, constr in [("fm", "REAL", "REAL"), ("cfm", "COMPLEX", "CMPLX")]
      98             : 
      99             : ! **************************************************************************************************
     100             : !> \brief   Copy a BLACS matrix to a dbcsr matrix.
     101             : !>
     102             : !>          real_matrix=beta*real_matrix+alpha*fm
     103             : !>          beta defaults to 0, alpha to 1
     104             : !> \param[in] fm              full matrix
     105             : !> \param[out] matrix         DBCSR matrix
     106             : !> \param[in] keep_sparsity   (optional) retains the sparsity of the input
     107             : !>                            matrix
     108             : !> \date    2009-10-13
     109             : !> \par History
     110             : !>          2009-10-13 rewritten based on copy_dbcsr_to_fm
     111             : !> \author  Urban Borstnik
     112             : !> \version 2.0
     113             : ! **************************************************************************************************
     114     1049232 :       SUBROUTINE copy_${fm}$_to_dbcsr(fm, matrix, keep_sparsity)
     115             :          TYPE(cp_${fm}$_type), INTENT(IN)                   :: fm
     116             :          TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     117             :          LOGICAL, INTENT(IN), OPTIONAL                      :: keep_sparsity
     118             : 
     119             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_${fm}$_to_dbcsr'
     120             : 
     121             :          TYPE(dbcsr_type)                                   :: bc_mat, redist_mat
     122             :          INTEGER                                            :: handle
     123             :          LOGICAL                                            :: my_keep_sparsity
     124             : 
     125     1049232 :          CALL timeset(routineN, handle)
     126             : 
     127     1049232 :          my_keep_sparsity = .FALSE.
     128     1049232 :          IF (PRESENT(keep_sparsity)) my_keep_sparsity = keep_sparsity
     129             : 
     130     1049232 :          CALL copy_${fm}$_to_dbcsr_bc(fm, bc_mat)
     131             : 
     132     1049232 :          IF (my_keep_sparsity) THEN
     133       82410 :             CALL dbcsr_create(redist_mat, template=matrix)
     134       82410 :             CALL dbcsr_complete_redistribute(bc_mat, redist_mat)
     135       82410 :             CALL dbcsr_copy(matrix, redist_mat, keep_sparsity=.TRUE.)
     136       82410 :             CALL dbcsr_release(redist_mat)
     137             :          ELSE
     138      966822 :             CALL dbcsr_complete_redistribute(bc_mat, matrix)
     139             :          END IF
     140             : 
     141     1049232 :          CALL dbcsr_release(bc_mat)
     142             : 
     143     1049232 :          CALL timestop(handle)
     144     1049232 :       END SUBROUTINE copy_${fm}$_to_dbcsr
     145             : 
     146             : ! **************************************************************************************************
     147             : !> \brief   Copy a BLACS matrix to a dbcsr matrix with a special block-cyclic distribution,
     148             : !>           which requires no complete redistribution.
     149             : !> \param fm ...
     150             : !> \param bc_mat ...
     151             : ! **************************************************************************************************
     152     1049602 :       SUBROUTINE copy_${fm}$_to_dbcsr_bc(fm, bc_mat)
     153             :          TYPE(cp_${fm}$_type), INTENT(IN)                    :: fm
     154             :          TYPE(dbcsr_type), INTENT(INOUT)                     :: bc_mat
     155             : 
     156             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_${fm}$_to_dbcsr_bc'
     157             : 
     158             :          INTEGER :: col, handle, ncol_block, ncol_global, nrow_block, nrow_global, row
     159     1049602 :          INTEGER, ALLOCATABLE, DIMENSION(:)                  :: first_col, first_row, last_col, last_row
     160     1049602 :          INTEGER, DIMENSION(:), POINTER                      :: col_blk_size, row_blk_size
     161     1049602 :          ${type}$ (KIND=dp), DIMENSION(:, :), POINTER        :: fm_block, dbcsr_block
     162             :          TYPE(dbcsr_distribution_type)                       :: bc_dist
     163             :          TYPE(dbcsr_iterator_type)                           :: iter
     164     1049602 :          INTEGER, DIMENSION(:, :), POINTER                   :: pgrid
     165             : 
     166     1049602 :          CALL timeset(routineN, handle)
     167             : 
     168             :          #:if (type=="REAL")
     169     1047064 :             IF (fm%use_sp) CPABORT("copy_${fm}$_to_dbcsr_bc: single precision not supported")
     170             :          #:endif
     171             : 
     172             :          ! Create processor grid
     173     1049602 :          pgrid => fm%matrix_struct%context%blacs2mpi
     174             : 
     175             :          ! Create a block-cyclic distribution compatible with the FM matrix.
     176     1049602 :          nrow_block = fm%matrix_struct%nrow_block
     177     1049602 :          ncol_block = fm%matrix_struct%ncol_block
     178     1049602 :          nrow_global = fm%matrix_struct%nrow_global
     179     1049602 :          ncol_global = fm%matrix_struct%ncol_global
     180     1049602 :          NULLIFY (col_blk_size, row_blk_size)
     181             :          CALL dbcsr_create_dist_block_cyclic(bc_dist, &
     182             :                                              nrows=nrow_global, ncolumns=ncol_global, & ! Actual full matrix size
     183             :                                              nrow_block=nrow_block, ncol_block=ncol_block, & ! BLACS parameters
     184             :                                              group_handle=fm%matrix_struct%para_env%get_handle(), pgrid=pgrid, &
     185     1049602 :                                              row_blk_sizes=row_blk_size, col_blk_sizes=col_blk_size) ! block-cyclic row/col sizes
     186             : 
     187             :          ! Create the block-cyclic DBCSR matrix
     188             :          CALL dbcsr_create(bc_mat, "Block-cyclic ", bc_dist, &
     189             :                            dbcsr_type_no_symmetry, row_blk_size, col_blk_size, nze=0, &
     190     1049602 :                            reuse_arrays=.TRUE., data_type=dbcsr_type_${type.lower()}$_8)
     191     1049602 :          CALL dbcsr_distribution_release(bc_dist)
     192             : 
     193             :          ! allocate all blocks
     194     1049602 :          CALL dbcsr_reserve_all_blocks(bc_mat)
     195             : 
     196     1049602 :          CALL calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     197             : 
     198             :          ! Copy the FM data to the block-cyclic DBCSR matrix.  This step
     199             :          ! could be skipped with appropriate DBCSR index manipulation.
     200     1049602 :          fm_block => fm%local_data
     201             : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(iter, row, col, dbcsr_block) &
     202     1049602 : !$OMP SHARED(bc_mat, last_row, first_row, last_col, first_col, fm_block)
     203             :          CALL dbcsr_iterator_start(iter, bc_mat)
     204             :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     205             :             CALL dbcsr_iterator_next_block(iter, row, col, dbcsr_block)
     206             :             dbcsr_block(:, :) = fm_block(first_row(row):last_row(row), first_col(col):last_col(col))
     207             :          END DO
     208             :          CALL dbcsr_iterator_stop(iter)
     209             : !$OMP END PARALLEL
     210             : 
     211     1049602 :          CALL timestop(handle)
     212     2099204 :       END SUBROUTINE copy_${fm}$_to_dbcsr_bc
     213             : 
     214             : ! **************************************************************************************************
     215             : !> \brief Copy a DBCSR matrix to a BLACS matrix
     216             : !> \param[in] matrix          DBCSR matrix
     217             : !> \param[out] fm             full matrix
     218             : ! **************************************************************************************************
     219     3910988 :       SUBROUTINE copy_dbcsr_to_${fm}$ (matrix, fm)
     220             :          TYPE(dbcsr_type), INTENT(IN)                       :: matrix
     221             :          TYPE(cp_${fm}$_type), INTENT(IN)                   :: fm
     222             : 
     223             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_dbcsr_to_${fm}$'
     224             : 
     225      977747 :          INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
     226             :          INTEGER :: handle, ncol_block, nfullcols_total, group_handle, &
     227             :                     nfullrows_total, nrow_block
     228             :          TYPE(dbcsr_type)                                   :: bc_mat, matrix_nosym
     229             :          TYPE(dbcsr_distribution_type)                      :: dist, bc_dist
     230             :          CHARACTER(len=default_string_length)               :: name
     231      977747 :          INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     232             : 
     233      977747 :          CALL timeset(routineN, handle)
     234             : 
     235             :          ! check compatibility
     236             :          CALL dbcsr_get_info(matrix, &
     237             :                              name=name, &
     238             :                              distribution=dist, &
     239             :                              nfullrows_total=nfullrows_total, &
     240      977747 :                              nfullcols_total=nfullcols_total)
     241             : 
     242      977747 :          CPASSERT(fm%matrix_struct%nrow_global == nfullrows_total)
     243      977747 :          CPASSERT(fm%matrix_struct%ncol_global == nfullcols_total)
     244             : 
     245             :          ! info about the full matrix
     246      977747 :          nrow_block = fm%matrix_struct%nrow_block
     247      977747 :          ncol_block = fm%matrix_struct%ncol_block
     248             : 
     249             :          ! Convert DBCSR to a block-cyclic
     250      977747 :          NULLIFY (col_blk_size, row_blk_size)
     251      977747 :          CALL dbcsr_distribution_get(dist, group=group_handle, pgrid=pgrid)
     252             :          CALL dbcsr_create_dist_block_cyclic(bc_dist, &
     253             :                                              nrows=nfullrows_total, ncolumns=nfullcols_total, &
     254             :                                              nrow_block=nrow_block, ncol_block=ncol_block, &
     255             :                                              group_handle=group_handle, pgrid=pgrid, &
     256      977747 :                                              row_blk_sizes=row_blk_size, col_blk_sizes=col_blk_size)
     257             : 
     258             :          CALL dbcsr_create(bc_mat, "Block-cyclic"//name, bc_dist, &
     259             :                            dbcsr_type_no_symmetry, row_blk_size, col_blk_size, &
     260             :                            nze=0, data_type=dbcsr_get_data_type(matrix), &
     261      977747 :                            reuse_arrays=.TRUE.)
     262      977747 :          CALL dbcsr_distribution_release(bc_dist)
     263             : 
     264      977747 :          CALL dbcsr_create(matrix_nosym, template=matrix, matrix_type="N")
     265      977747 :          CALL dbcsr_desymmetrize(matrix, matrix_nosym)
     266      977747 :          CALL dbcsr_complete_redistribute(matrix_nosym, bc_mat)
     267      977747 :          CALL dbcsr_release(matrix_nosym)
     268             : 
     269      977747 :          CALL copy_dbcsr_to_${fm}$_bc(bc_mat, fm)
     270             : 
     271      977747 :          CALL dbcsr_release(bc_mat)
     272             : 
     273      977747 :          CALL timestop(handle)
     274      977747 :       END SUBROUTINE copy_dbcsr_to_${fm}$
     275             : 
     276             : ! **************************************************************************************************
     277             : !> \brief Copy a DBCSR_BLACS matrix to a BLACS matrix
     278             : !> \param bc_mat DBCSR matrix
     279             : !> \param[out] fm             full matrix
     280             : ! **************************************************************************************************
     281      977747 :       SUBROUTINE copy_dbcsr_to_${fm}$_bc(bc_mat, fm)
     282             :          TYPE(dbcsr_type), INTENT(IN)                       :: bc_mat
     283             :          TYPE(cp_${fm}$_type), INTENT(IN)                   :: fm
     284             : 
     285             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_dbcsr_to_${fm}$_bc'
     286             : 
     287             :          INTEGER :: col, handle, row
     288      977747 :          INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_col, first_row, last_col, last_row
     289      977747 :          ${type}$ (KIND=dp), DIMENSION(:, :), POINTER       :: dbcsr_block, fm_block
     290             :          TYPE(dbcsr_iterator_type)                          :: iter
     291             : 
     292      977747 :          CALL timeset(routineN, handle)
     293             : 
     294             :          #:if (type=="REAL")
     295      975209 :             IF (fm%use_sp) CPABORT("copy_dbcsr_to_${fm}$_bc: single precision not supported")
     296             :          #:endif
     297             : 
     298      977747 :          CALL calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     299             : 
     300             :          ! Now copy data to the FM matrix
     301      977747 :          fm_block => fm%local_data
     302   365229811 :          fm_block = ${constr}$ (0.0, KIND=dp)
     303             : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(iter, row, col, dbcsr_block) &
     304      977747 : !$OMP SHARED(bc_mat, last_row, first_row, last_col, first_col, fm_block)
     305             :          CALL dbcsr_iterator_start(iter, bc_mat)
     306             :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     307             :             CALL dbcsr_iterator_next_block(iter, row, col, dbcsr_block)
     308             :             fm_block(first_row(row):last_row(row), first_col(col):last_col(col)) = dbcsr_block(:, :)
     309             :          END DO
     310             :          CALL dbcsr_iterator_stop(iter)
     311             : !$OMP END PARALLEL
     312             : 
     313      977747 :          CALL timestop(handle)
     314     1955494 :       END SUBROUTINE copy_dbcsr_to_${fm}$_bc
     315             : 
     316             :    #:endfor
     317             : 
     318             : ! **************************************************************************************************
     319             : !> \brief Helper routine used to copy blocks from DBCSR into FM matrices and vice versa
     320             : !> \param bc_mat ...
     321             : !> \param first_row ...
     322             : !> \param last_row ...
     323             : !> \param first_col ...
     324             : !> \param last_col ...
     325             : !> \author Ole Schuett
     326             : ! **************************************************************************************************
     327     2027349 :    SUBROUTINE calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     328             :       TYPE(dbcsr_type), INTENT(IN)                    :: bc_mat
     329             :       INTEGER :: col, nblkcols_local, nblkcols_total, nblkrows_local, nblkrows_total, row
     330             :       INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT)    :: first_col, first_row, last_col, &
     331             :                                                             last_row
     332             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_col_sizes, local_row_sizes
     333     2027349 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, local_cols, local_rows, &
     334     2027349 :                                                             row_blk_size
     335             : 
     336             :       CALL dbcsr_get_info(bc_mat, &
     337             :                           nblkrows_total=nblkrows_total, &
     338             :                           nblkcols_total=nblkcols_total, &
     339             :                           nblkrows_local=nblkrows_local, &
     340             :                           nblkcols_local=nblkcols_local, &
     341             :                           local_rows=local_rows, &
     342             :                           local_cols=local_cols, &
     343             :                           row_blk_size=row_blk_size, &
     344     2027349 :                           col_blk_size=col_blk_size)
     345             : 
     346             :       ! calculate first_row and last_row
     347     6081535 :       ALLOCATE (local_row_sizes(nblkrows_total))
     348     6051824 :       local_row_sizes(:) = 0
     349     2027349 :       IF (nblkrows_local .GE. 1) THEN
     350     4059655 :          DO row = 1, nblkrows_local
     351     4059655 :             local_row_sizes(local_rows(row)) = row_blk_size(local_rows(row))
     352             :          END DO
     353             :       END IF
     354    10134697 :       ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total))
     355     2027349 :       CALL dbcsr_convert_sizes_to_offsets(local_row_sizes, first_row, last_row)
     356     2027349 :       DEALLOCATE (local_row_sizes)
     357             : 
     358             :       ! calculate first_col and last_col
     359     6080265 :       ALLOCATE (local_col_sizes(nblkcols_total))
     360     4703472 :       local_col_sizes(:) = 0
     361     2027349 :       IF (nblkcols_local .GE. 1) THEN
     362     4701690 :          DO col = 1, nblkcols_local
     363     4701690 :             local_col_sizes(local_cols(col)) = col_blk_size(local_cols(col))
     364             :          END DO
     365             :       END IF
     366    10129617 :       ALLOCATE (first_col(nblkcols_total), last_col(nblkcols_total))
     367     2027349 :       CALL dbcsr_convert_sizes_to_offsets(local_col_sizes, first_col, last_col)
     368     2027349 :       DEALLOCATE (local_col_sizes)
     369             : 
     370     2027349 :    END SUBROUTINE calculate_fm_block_ranges
     371             : 
     372             : ! **************************************************************************************************
     373             : !> \brief hack for dbcsr_copy_columns
     374             : !> \param matrix_b ...
     375             : !> \param matrix_a ...
     376             : !> \param ncol ...
     377             : !> \param source_start ...
     378             : !> \param target_start ...
     379             : !> \param para_env ...
     380             : !> \param blacs_env ...
     381             : !> \author vw
     382             : ! **************************************************************************************************
     383        9016 :    SUBROUTINE dbcsr_copy_columns_hack(matrix_b, matrix_a, &
     384             :                                       ncol, source_start, target_start, para_env, blacs_env)
     385             : 
     386             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix_b
     387             :       TYPE(dbcsr_type), INTENT(IN)                    :: matrix_a
     388             :       INTEGER, INTENT(IN)                                :: ncol, source_start, target_start
     389             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     390             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     391             : 
     392             :       INTEGER                                            :: nfullcols_total, nfullrows_total
     393             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     394             :       TYPE(cp_fm_type)                         :: fm_matrix_a, fm_matrix_b
     395             : 
     396        2254 :       NULLIFY (fm_struct)
     397        2254 :       CALL dbcsr_get_info(matrix_a, nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
     398             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nfullrows_total, &
     399        2254 :                                ncol_global=nfullcols_total, para_env=para_env)
     400        2254 :       CALL cp_fm_create(fm_matrix_a, fm_struct, name="fm_matrix_a")
     401        2254 :       CALL cp_fm_struct_release(fm_struct)
     402             : 
     403        2254 :       CALL dbcsr_get_info(matrix_b, nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
     404             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nfullrows_total, &
     405        2254 :                                ncol_global=nfullcols_total, para_env=para_env)
     406        2254 :       CALL cp_fm_create(fm_matrix_b, fm_struct, name="fm_matrix_b")
     407        2254 :       CALL cp_fm_struct_release(fm_struct)
     408             : 
     409        2254 :       CALL copy_dbcsr_to_fm(matrix_a, fm_matrix_a)
     410        2254 :       CALL copy_dbcsr_to_fm(matrix_b, fm_matrix_b)
     411             : 
     412        2254 :       CALL cp_fm_to_fm(fm_matrix_a, fm_matrix_b, ncol, source_start, target_start)
     413             : 
     414        2254 :       CALL copy_fm_to_dbcsr(fm_matrix_b, matrix_b)
     415             : 
     416        2254 :       CALL cp_fm_release(fm_matrix_a)
     417        2254 :       CALL cp_fm_release(fm_matrix_b)
     418             : 
     419        2254 :    END SUBROUTINE dbcsr_copy_columns_hack
     420             : 
     421             : ! **************************************************************************************************
     422             : !> \brief Creates a DBCSR distribution from a distribution_2d
     423             : !> \param[in] dist2d          distribution_2d
     424             : !> \param[out] dist           DBCSR distribution
     425             : !> \par History
     426             : !>    move form dbcsr_operation 01.2010
     427             : ! **************************************************************************************************
     428        8650 :    SUBROUTINE cp_dbcsr_dist2d_to_dist(dist2d, dist)
     429             :       TYPE(distribution_2d_type), INTENT(IN), TARGET     :: dist2d
     430             :       TYPE(dbcsr_distribution_type), INTENT(OUT)          :: dist
     431             : 
     432        8650 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid, col_dist_data, row_dist_data
     433             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     434             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     435             :       TYPE(distribution_2d_type), POINTER                :: dist2d_p
     436        8650 :       INTEGER, DIMENSION(:), POINTER         :: row_dist, col_dist
     437             : 
     438        8650 :       dist2d_p => dist2d
     439             :       CALL distribution_2d_get(dist2d_p, &
     440             :                                row_distribution=row_dist_data, &
     441             :                                col_distribution=col_dist_data, &
     442        8650 :                                blacs_env=blacs_env)
     443        8650 :       CALL blacs_env%get(para_env=para_env, blacs2mpi=pgrid)
     444             : 
     445             :       ! map to 1D arrays
     446        8650 :       row_dist => row_dist_data(:, 1)
     447        8650 :       col_dist => col_dist_data(:, 1)
     448             :       !row_cluster => row_dist_data(:, 2)
     449             :       !col_cluster => col_dist_data(:, 2)
     450             : 
     451             :       CALL dbcsr_distribution_new(dist, &
     452             :                                   group=para_env%get_handle(), pgrid=pgrid, &
     453             :                                   row_dist=row_dist, &
     454        8650 :                                   col_dist=col_dist)
     455             : 
     456        8650 :    END SUBROUTINE cp_dbcsr_dist2d_to_dist
     457             : 
     458             : ! **************************************************************************************************
     459             : !> \brief multiply a dbcsr with a replicated array
     460             : !>        c = alpha_scalar * A (dbscr) * b + c
     461             : !> \param[in] matrix_a DBSCR matrxx
     462             : !> \param[in]  vec_b        vectors b
     463             : !> \param[inout] vec_c      vectors c
     464             : !> \param[in]  ncol         nbr of columns
     465             : !> \param[in]  alpha        alpha
     466             : !>
     467             : ! **************************************************************************************************
     468           0 :    SUBROUTINE dbcsr_multiply_local(matrix_a, vec_b, vec_c, ncol, alpha)
     469             :       TYPE(dbcsr_type), INTENT(IN)                    :: matrix_a
     470             :       REAL(dp), DIMENSION(:, :), INTENT(IN)              :: vec_b
     471             :       REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: vec_c
     472             :       INTEGER, INTENT(in), OPTIONAL                      :: ncol
     473             :       REAL(dp), INTENT(IN), OPTIONAL                     :: alpha
     474             : 
     475             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_multiply_local'
     476             : 
     477             :       INTEGER                                            :: blk, col, coloff, my_ncol, row, rowoff, &
     478             :                                                             timing_handle
     479             :       LOGICAL                                            :: has_symm
     480             :       REAL(dp)                                           :: my_alpha, my_alpha2
     481           0 :       REAL(dp), DIMENSION(:, :), POINTER                 :: data_d
     482             :       TYPE(dbcsr_iterator_type)                            :: iter
     483             : 
     484           0 :       CALL timeset(routineN, timing_handle)
     485             : 
     486           0 :       my_alpha = 1.0_dp
     487           0 :       IF (PRESENT(alpha)) my_alpha = alpha
     488             : 
     489           0 :       my_ncol = SIZE(vec_b, 2)
     490           0 :       IF (PRESENT(ncol)) my_ncol = ncol
     491             : 
     492           0 :       my_alpha2 = 0.0_dp
     493           0 :       IF (dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_symmetric) my_alpha2 = my_alpha
     494           0 :       IF (dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_antisymmetric) my_alpha2 = -my_alpha
     495             : 
     496             :       has_symm = (dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_symmetric .OR. &
     497           0 :                   dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_antisymmetric)
     498             : 
     499             : !$OMP     PARALLEL DEFAULT(NONE) SHARED(matrix_a,vec_b,vec_c,ncol,my_alpha2,my_alpha,my_ncol,has_symm) &
     500           0 : !$OMP              PRIVATE(iter,row,col,data_d,blk,rowoff,coloff)
     501             :       CALL dbcsr_iterator_start(iter, matrix_a, read_only=.TRUE., dynamic=.TRUE., dynamic_byrows=.TRUE.)
     502             :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     503             :          CALL dbcsr_iterator_next_block(iter, row, col, data_d, blk, row_offset=rowoff, col_offset=coloff)
     504             :          IF (my_ncol .NE. 1) THEN
     505             :             CALL dgemm('N', 'N', &
     506             :                        SIZE(data_d, 1), my_ncol, SIZE(data_d, 2), &
     507             :                        my_alpha, data_d(1, 1), SIZE(data_d, 1), &
     508             :                        vec_b(coloff, 1), SIZE(vec_b, 1), &
     509             :                        1.0_dp, vec_c(rowoff, 1), SIZE(vec_c, 1))
     510             :          ELSE
     511             :             CALL dgemv('N', SIZE(data_d, 1), SIZE(data_d, 2), &
     512             :                        my_alpha, data_d(1, 1), SIZE(data_d, 1), &
     513             :                        vec_b(coloff, 1), 1, &
     514             :                        1.0_dp, vec_c(rowoff, 1), 1)
     515             :          END IF
     516             :       END DO
     517             :       CALL dbcsr_iterator_stop(iter)
     518             : !$OMP     END PARALLEL
     519             : 
     520             :       ! FIXME ... in the symmetric case, the writes to vec_c depend on the column, not the row. This makes OMP-ing more difficult
     521             :       ! needs e.g. a buffer for vec_c and a reduction of that buffer.
     522           0 :       IF (has_symm) THEN
     523           0 :          CALL dbcsr_iterator_start(iter, matrix_a)
     524           0 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     525           0 :             CALL dbcsr_iterator_next_block(iter, row, col, data_d, blk, row_offset=rowoff, col_offset=coloff)
     526           0 :             IF (row .NE. col) THEN
     527           0 :                IF (my_ncol .NE. 1) THEN
     528             :                   CALL dgemm('T', 'N', &
     529             :                              SIZE(data_d, 2), my_ncol, SIZE(data_d, 1), &
     530             :                              my_alpha2, data_d(1, 1), SIZE(data_d, 1), &
     531             :                              vec_b(rowoff, 1), SIZE(vec_b, 1), &
     532           0 :                              1.0_dp, vec_c(coloff, 1), SIZE(vec_c, 1))
     533             :                ELSE
     534             :                   CALL dgemv('T', SIZE(data_d, 1), SIZE(data_d, 2), &
     535             :                              my_alpha2, data_d(1, 1), SIZE(data_d, 1), &
     536             :                              vec_b(rowoff, 1), 1, &
     537           0 :                              1.0_dp, vec_c(coloff, 1), 1)
     538             :                END IF
     539             :             END IF
     540             :          END DO
     541           0 :          CALL dbcsr_iterator_stop(iter)
     542             :       END IF
     543             : 
     544           0 :       CALL timestop(timing_handle)
     545           0 :    END SUBROUTINE dbcsr_multiply_local
     546             : 
     547             : ! **************************************************************************************************
     548             : !> \brief multiply a dbcsr with a fm matrix
     549             : !>
     550             : !> For backwards compatibility with BLAS XGEMM, this routine supports
     551             : !> the multiplication of matrices with incompatible dimensions.
     552             : !>
     553             : !> \param[in]  matrix         DBCSR matrix
     554             : !> \param fm_in full matrix
     555             : !> \param fm_out full matrix
     556             : !> \param[in]  ncol           nbr of columns
     557             : !> \param[in]  alpha          alpha
     558             : !> \param[in]  beta           beta
     559             : !>
     560             : ! **************************************************************************************************
     561     2691942 :    SUBROUTINE cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
     562             :       TYPE(dbcsr_type), INTENT(IN)                    :: matrix
     563             :       TYPE(cp_fm_type), INTENT(IN)                          :: fm_in, fm_out
     564             :       INTEGER, INTENT(IN)                                :: ncol
     565             :       REAL(dp), INTENT(IN), OPTIONAL                     :: alpha, beta
     566             : 
     567             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_sm_fm_multiply'
     568             : 
     569             :       INTEGER                                            :: k_in, k_out, timing_handle, &
     570             :                                                             timing_handle_mult, &
     571             :                                                             a_ncol, a_nrow, b_ncol, b_nrow, c_ncol, c_nrow
     572      448657 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_right_in, &
     573      448657 :                                                             col_blk_size_right_out, row_blk_size, &
     574             :                                                             !row_cluster, col_cluster,&
     575      448657 :                                                             row_dist, col_dist, col_blk_size
     576             :       TYPE(dbcsr_type)                                   :: in, out
     577             :       REAL(dp)                                           :: my_alpha, my_beta
     578             :       TYPE(dbcsr_distribution_type)                       :: dist, dist_right_in, product_dist
     579             : 
     580      448657 :       CALL timeset(routineN, timing_handle)
     581             : 
     582      448657 :       my_alpha = 1.0_dp
     583      448657 :       my_beta = 0.0_dp
     584      448657 :       IF (PRESENT(alpha)) my_alpha = alpha
     585      448657 :       IF (PRESENT(beta)) my_beta = beta
     586             : 
     587             :       ! TODO
     588      448657 :       CALL cp_fm_get_info(fm_in, ncol_global=b_ncol, nrow_global=b_nrow)
     589      448657 :       CALL cp_fm_get_info(fm_out, ncol_global=c_ncol, nrow_global=c_nrow)
     590      448657 :       CALL dbcsr_get_info(matrix, nfullrows_total=a_nrow, nfullcols_total=a_ncol)
     591             :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: A ", a_nrow, "x", a_ncol
     592             :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: B ", b_nrow, "x", b_ncol
     593             :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: C ", c_nrow, "x", c_ncol
     594             : 
     595      448657 :       CALL cp_fm_get_info(fm_out, ncol_global=k_out)
     596             : 
     597      448657 :       CALL cp_fm_get_info(fm_in, ncol_global=k_in)
     598             :       !write(*,*)routineN//" -----------------------------------"
     599             :       !IF (k_in .NE. k_out) &
     600             :       !   WRITE(*,'(3(A,I5,1X),2(A,F5.2,1X))')&
     601             :       !   routineN//" ncol", ncol,'k_in',k_in,'k_out',k_out,&
     602             :       !   'alpha',my_alpha,'beta',my_beta
     603             : 
     604      448657 :       IF (ncol .GT. 0 .AND. k_out .GT. 0 .AND. k_in .GT. 0) THEN
     605      447631 :          CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, col_blk_size=col_blk_size, distribution=dist)
     606      447631 :          CALL dbcsr_create_dist_r_unrot(dist_right_in, dist, k_in, col_blk_size_right_in)
     607             : 
     608             :          CALL dbcsr_create(in, "D", dist_right_in, dbcsr_type_no_symmetry, &
     609      447631 :                            col_blk_size, col_blk_size_right_in, nze=0)
     610             : 
     611      447631 :          CALL dbcsr_distribution_get(dist, row_dist=row_dist)
     612      447631 :          CALL dbcsr_distribution_get(dist_right_in, col_dist=col_dist)
     613             :          CALL dbcsr_distribution_new(product_dist, template=dist, &
     614      447631 :                                      row_dist=row_dist, col_dist=col_dist)
     615     1342893 :          ALLOCATE (col_blk_size_right_out(SIZE(col_blk_size_right_in)))
     616     1817284 :          col_blk_size_right_out = col_blk_size_right_in
     617      447631 :          CALL match_col_sizes(col_blk_size_right_out, col_blk_size_right_in, k_out)
     618             : 
     619             :          !if (k_in .ne. k_out) then
     620             :          !   write(*,*)routineN//" in cs", col_blk_size_right_in
     621             :          !   write(*,*)routineN//" out cs", col_blk_size_right_out
     622             :          !endif
     623             : 
     624             :          CALL dbcsr_create(out, "D", product_dist, dbcsr_type_no_symmetry, &
     625      447631 :                            row_blk_size, col_blk_size_right_out, nze=0)
     626             : 
     627      447631 :          CALL copy_fm_to_dbcsr(fm_in, in)
     628      447631 :          IF (ncol .NE. k_out .OR. my_beta .NE. 0.0_dp) &
     629      102994 :             CALL copy_fm_to_dbcsr(fm_out, out)
     630             : 
     631      447631 :          CALL timeset(routineN//'_core', timing_handle_mult)
     632             :          CALL dbcsr_multiply("N", "N", my_alpha, matrix, in, my_beta, out, &
     633      447631 :                              last_column=ncol)
     634      447631 :          CALL timestop(timing_handle_mult)
     635             : 
     636      447631 :          CALL copy_dbcsr_to_fm(out, fm_out)
     637             : 
     638      447631 :          CALL dbcsr_release(in)
     639      447631 :          CALL dbcsr_release(out)
     640      447631 :          DEALLOCATE (col_blk_size_right_in, col_blk_size_right_out)
     641      447631 :          CALL dbcsr_distribution_release(dist_right_in)
     642     1790524 :          CALL dbcsr_distribution_release(product_dist)
     643             : 
     644             :       END IF
     645             : 
     646      448657 :       CALL timestop(timing_handle)
     647             : 
     648      448657 :    END SUBROUTINE cp_dbcsr_sm_fm_multiply
     649             : 
     650             : ! **************************************************************************************************
     651             : !> \brief ...
     652             : !> \param sizes1 ...
     653             : !> \param sizes2 ...
     654             : !> \param full_num ...
     655             : ! **************************************************************************************************
     656      447631 :    SUBROUTINE match_col_sizes(sizes1, sizes2, full_num)
     657             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: sizes1
     658             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: sizes2
     659             :       INTEGER, INTENT(IN)                                :: full_num
     660             : 
     661             :       INTEGER                                            :: left, n1, n2, p, rm, used
     662             : 
     663      447631 :       n1 = SIZE(sizes1)
     664      447631 :       n2 = SIZE(sizes2)
     665      447631 :       IF (n1 .NE. n2) &
     666           0 :          CPABORT("distributions must be equal!")
     667      908642 :       sizes1(1:n1) = sizes2(1:n1)
     668      908642 :       used = SUM(sizes1(1:n1))
     669             :       ! If sizes1 does not cover everything, then we increase the
     670             :       ! size of the last block; otherwise we reduce the blocks
     671             :       ! (from the end) until it is small enough.
     672      447631 :       IF (used .LT. full_num) THEN
     673           0 :          sizes1(n1) = sizes1(n1) + full_num - used
     674             :       ELSE
     675      447631 :          left = used - full_num
     676      447631 :          p = n1
     677      447631 :          DO WHILE (left .GT. 0 .AND. p .GT. 0)
     678           0 :             rm = MIN(left, sizes1(p))
     679           0 :             sizes1(p) = sizes1(p) - rm
     680           0 :             left = left - rm
     681           0 :             p = p - 1
     682             :          END DO
     683             :       END IF
     684      447631 :    END SUBROUTINE match_col_sizes
     685             : 
     686             : ! **************************************************************************************************
     687             : !> \brief performs the multiplication sparse_matrix+dense_mat*dens_mat^T
     688             : !>        if matrix_g is not explicitly given, matrix_v^T will be used
     689             : !>        this can be important to save the necessary redistribute for a
     690             : !>        different matrix_g and increase performance.
     691             : !> \param sparse_matrix ...
     692             : !> \param matrix_v ...
     693             : !> \param matrix_g ...
     694             : !> \param ncol ...
     695             : !> \param alpha ...
     696             : !> \param keep_sparsity Determines if the sparsity of sparse_matrix is retained
     697             : !>        by default it is TRUE
     698             : !> \param symmetry_mode There are the following modes
     699             : !>        1:     sparse_matrix += 0.5*alpha*(v*g^T+g^T*v)    (symmetric update)
     700             : !>        -1:    sparse_matrix += 0.5*alpha*(v*g^T-g^T*v)    (skewsymmetric update)
     701             : !>        else:  sparse_matrix += alpha*v*g^T                (no symmetry, default)
     702             : !>        saves some redistribution steps
     703             : ! **************************************************************************************************
     704      189326 :    SUBROUTINE cp_dbcsr_plus_fm_fm_t(sparse_matrix, matrix_v, matrix_g, ncol, alpha, keep_sparsity, symmetry_mode)
     705             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: sparse_matrix
     706             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_v
     707             :       TYPE(cp_fm_type), OPTIONAL, INTENT(IN)             :: matrix_g
     708             :       INTEGER, INTENT(IN)                                :: ncol
     709             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: alpha
     710             :       LOGICAL, INTENT(IN), OPTIONAL                      :: keep_sparsity
     711             :       INTEGER, INTENT(IN), OPTIONAL :: symmetry_mode
     712             : 
     713             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_plus_fm_fm_t_native'
     714             : 
     715             :       INTEGER                                            :: npcols, k, nao, timing_handle, data_type, my_symmetry_mode
     716      189326 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_left, &
     717      189326 :                                                             col_dist_left, row_blk_size, row_dist
     718             :       LOGICAL                                            :: check_product, my_keep_sparsity
     719             :       REAL(KIND=dp)                                      :: my_alpha, norm
     720             :       TYPE(dbcsr_type)                                :: mat_g, mat_v, sparse_matrix2, &
     721             :                                                          sparse_matrix3
     722             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     723             :       TYPE(cp_fm_type)                          :: fm_matrix
     724             :       TYPE(dbcsr_distribution_type)                       :: dist_left, sparse_dist
     725             : 
     726      189326 :       check_product = .FALSE.
     727             : 
     728      189326 :       CALL timeset(routineN, timing_handle)
     729             : 
     730      189326 :       my_keep_sparsity = .TRUE.
     731      189326 :       IF (PRESENT(keep_sparsity)) my_keep_sparsity = keep_sparsity
     732             : 
     733      189326 :       my_symmetry_mode = 0
     734      189326 :       IF (PRESENT(symmetry_mode)) my_symmetry_mode = symmetry_mode
     735             : 
     736      189326 :       NULLIFY (col_dist_left)
     737             : 
     738      189326 :       IF (ncol .GT. 0) THEN
     739      187878 :          IF (.NOT. dbcsr_valid_index(sparse_matrix)) &
     740           0 :             CPABORT("sparse_matrix must pre-exist")
     741             :          !
     742             :          ! Setup matrix_v
     743      187878 :          CALL cp_fm_get_info(matrix_v, ncol_global=k)
     744             :          !WRITE(*,*)routineN//'truncated mult k, ncol',k,ncol,' PRESENT (matrix_g)',PRESENT (matrix_g)
     745      187878 :          CALL dbcsr_get_info(sparse_matrix, distribution=sparse_dist)
     746      187878 :          CALL dbcsr_distribution_get(sparse_dist, npcols=npcols, row_dist=row_dist)
     747      187878 :          CALL create_bl_distribution(col_dist_left, col_blk_size_left, k, npcols)
     748             :          CALL dbcsr_distribution_new(dist_left, template=sparse_dist, &
     749      187878 :                                      row_dist=row_dist, col_dist=col_dist_left)
     750      187878 :          DEALLOCATE (col_dist_left)
     751      187878 :          CALL dbcsr_get_info(sparse_matrix, row_blk_size=row_blk_size, data_type=data_type)
     752             :          CALL dbcsr_create(mat_v, "DBCSR matrix_v", dist_left, dbcsr_type_no_symmetry, &
     753      187878 :                            row_blk_size, col_blk_size_left, nze=0, data_type=data_type)
     754      187878 :          CALL copy_fm_to_dbcsr(matrix_v, mat_v)
     755      187878 :          CALL dbcsr_verify_matrix(mat_v)
     756             :          !
     757             :          ! Setup matrix_g
     758      187878 :          IF (PRESENT(matrix_g)) THEN
     759             :             CALL dbcsr_create(mat_g, "DBCSR matrix_g", dist_left, dbcsr_type_no_symmetry, &
     760       86689 :                               row_blk_size, col_blk_size_left, data_type=data_type)
     761       86689 :             CALL copy_fm_to_dbcsr(matrix_g, mat_g)
     762             :          END IF
     763             :          !
     764      187878 :          DEALLOCATE (col_blk_size_left)
     765      187878 :          CALL dbcsr_distribution_release(dist_left)
     766             :          !
     767             :          !
     768             :          IF (check_product) THEN
     769             :             CALL cp_fm_get_info(matrix_v, nrow_global=nao)
     770             :             CALL cp_fm_struct_create(fm_struct_tmp, context=matrix_v%matrix_struct%context, nrow_global=nao, &
     771             :                                      ncol_global=nao, para_env=matrix_v%matrix_struct%para_env)
     772             :             CALL cp_fm_create(fm_matrix, fm_struct_tmp, name="fm matrix")
     773             :             CALL cp_fm_struct_release(fm_struct_tmp)
     774             :             CALL copy_dbcsr_to_fm(sparse_matrix, fm_matrix)
     775             :             CALL dbcsr_copy(sparse_matrix3, sparse_matrix)
     776             :          END IF
     777             :          !
     778      187878 :          my_alpha = 1.0_dp
     779      187878 :          IF (PRESENT(alpha)) my_alpha = alpha
     780      187878 :          IF (PRESENT(matrix_g)) THEN
     781       86689 :             IF (my_symmetry_mode == 1) THEN
     782             :                ! Symmetric mode
     783             :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_v, mat_g, &
     784             :                                    1.0_dp, sparse_matrix, &
     785             :                                    retain_sparsity=my_keep_sparsity, &
     786       37260 :                                    last_k=ncol)
     787             :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_g, mat_v, &
     788             :                                    1.0_dp, sparse_matrix, &
     789             :                                    retain_sparsity=my_keep_sparsity, &
     790       37260 :                                    last_k=ncol)
     791       49429 :             ELSE IF (my_symmetry_mode == -1) THEN
     792             :                ! Skewsymmetric mode
     793             :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_v, mat_g, &
     794             :                                    1.0_dp, sparse_matrix, &
     795             :                                    retain_sparsity=my_keep_sparsity, &
     796        2198 :                                    last_k=ncol)
     797             :                CALL dbcsr_multiply("N", "T", -0.5_dp*my_alpha, mat_g, mat_v, &
     798             :                                    1.0_dp, sparse_matrix, &
     799             :                                    retain_sparsity=my_keep_sparsity, &
     800        2198 :                                    last_k=ncol)
     801             :             ELSE
     802             :                ! Normal mode
     803             :                CALL dbcsr_multiply("N", "T", my_alpha, mat_v, mat_g, &
     804             :                                    1.0_dp, sparse_matrix, &
     805             :                                    retain_sparsity=my_keep_sparsity, &
     806       47231 :                                    last_k=ncol)
     807             :             END IF
     808             :          ELSE
     809             :             CALL dbcsr_multiply("N", "T", my_alpha, mat_v, mat_v, &
     810             :                                 1.0_dp, sparse_matrix, &
     811             :                                 retain_sparsity=my_keep_sparsity, &
     812      101189 :                                 last_k=ncol)
     813             :          END IF
     814             : 
     815             :          IF (check_product) THEN
     816             :             IF (PRESENT(matrix_g)) THEN
     817             :                IF (my_symmetry_mode == 1) THEN
     818             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_v, matrix_g, &
     819             :                                   1.0_dp, fm_matrix)
     820             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_g, matrix_v, &
     821             :                                   1.0_dp, fm_matrix)
     822             :                ELSE IF (my_symmetry_mode == -1) THEN
     823             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_v, matrix_g, &
     824             :                                   1.0_dp, fm_matrix)
     825             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, -0.5_dp*my_alpha, matrix_g, matrix_v, &
     826             :                                   1.0_dp, fm_matrix)
     827             :                ELSE
     828             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, my_alpha, matrix_v, matrix_g, &
     829             :                                   1.0_dp, fm_matrix)
     830             :                END IF
     831             :             ELSE
     832             :                CALL cp_fm_gemm("N", "T", nao, nao, ncol, my_alpha, matrix_v, matrix_v, &
     833             :                                1.0_dp, fm_matrix)
     834             :             END IF
     835             : 
     836             :             CALL dbcsr_copy(sparse_matrix2, sparse_matrix)
     837             :             CALL dbcsr_scale(sparse_matrix2, alpha_scalar=0.0_dp)
     838             :             CALL copy_fm_to_dbcsr(fm_matrix, sparse_matrix2, keep_sparsity=my_keep_sparsity)
     839             :             CALL dbcsr_add(sparse_matrix2, sparse_matrix, alpha_scalar=1.0_dp, &
     840             :                            beta_scalar=-1.0_dp)
     841             :             CALL dbcsr_norm(sparse_matrix2, which_norm=dbcsr_norm_frobenius, &
     842             :                             norm_scalar=norm)
     843             :             WRITE (*, *) 'nao=', nao, ' k=', k, ' ncol=', ncol, ' my_alpha=', my_alpha
     844             :             WRITE (*, *) 'PRESENT (matrix_g)', PRESENT(matrix_g)
     845             :             WRITE (*, *) 'matrix_type=', dbcsr_get_matrix_type(sparse_matrix)
     846             :             WRITE (*, *) 'norm(sm+alpha*v*g^t - fm+alpha*v*g^t)/n=', norm/REAL(nao, dp)
     847             :             IF (norm/REAL(nao, dp) .GT. 1e-12_dp) THEN
     848             :                !WRITE(*,*) 'fm_matrix'
     849             :                !DO j=1,SIZE(fm_matrix%local_data,2)
     850             :                !   DO i=1,SIZE(fm_matrix%local_data,1)
     851             :                !      WRITE(*,'(A,I3,A,I3,A,E26.16,A)') 'a(',i,',',j,')=',fm_matrix%local_data(i,j),';'
     852             :                !   ENDDO
     853             :                !ENDDO
     854             :                !WRITE(*,*) 'mat_v'
     855             :                !CALL dbcsr_print(mat_v,matlab_format=.TRUE.)
     856             :                !WRITE(*,*) 'mat_g'
     857             :                !CALL dbcsr_print(mat_g,matlab_format=.TRUE.)
     858             :                !WRITE(*,*) 'sparse_matrix'
     859             :                !CALL dbcsr_print(sparse_matrix,matlab_format=.TRUE.)
     860             :                !WRITE(*,*) 'sparse_matrix2 (-sm + sparse(fm))'
     861             :                !CALL dbcsr_print(sparse_matrix2,matlab_format=.TRUE.)
     862             :                !WRITE(*,*) 'sparse_matrix3 (copy of sm input)'
     863             :                !CALL dbcsr_print(sparse_matrix3,matlab_format=.TRUE.)
     864             :                !stop
     865             :             END IF
     866             :             CALL dbcsr_release(sparse_matrix2)
     867             :             CALL dbcsr_release(sparse_matrix3)
     868             :             CALL cp_fm_release(fm_matrix)
     869             :          END IF
     870      187878 :          CALL dbcsr_release(mat_v)
     871      187878 :          IF (PRESENT(matrix_g)) CALL dbcsr_release(mat_g)
     872             :       END IF
     873      189326 :       CALL timestop(timing_handle)
     874             : 
     875      189326 :    END SUBROUTINE cp_dbcsr_plus_fm_fm_t
     876             : 
     877             : ! **************************************************************************************************
     878             : !> \brief Utility function to copy a specially shaped fm to dbcsr_matrix
     879             : !>        The result matrix will be the matrix in dbcsr format
     880             : !>        with the row blocks sizes according to the block_sizes of the template
     881             : !>        and the col blocks sizes evenly blocked with the internal dbcsr conversion
     882             : !>        size (32 is the current default)
     883             : !> \param matrix ...
     884             : !> \param fm_in ...
     885             : !> \param template ...
     886             : ! **************************************************************************************************
     887       19760 :    SUBROUTINE cp_fm_to_dbcsr_row_template(matrix, fm_in, template)
     888             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix
     889             :       TYPE(cp_fm_type), INTENT(IN)                    :: fm_in
     890             :       TYPE(dbcsr_type), INTENT(IN)                    :: template
     891             : 
     892             :       INTEGER                                            :: k_in, data_type
     893        4940 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_right_in, row_blk_size
     894             :       TYPE(dbcsr_distribution_type)                       :: tmpl_dist, dist_right_in
     895             : 
     896        4940 :       CALL cp_fm_get_info(fm_in, ncol_global=k_in)
     897             : 
     898        4940 :       CALL dbcsr_get_info(template, distribution=tmpl_dist)
     899        4940 :       CALL dbcsr_create_dist_r_unrot(dist_right_in, tmpl_dist, k_in, col_blk_size_right_in)
     900        4940 :       CALL dbcsr_get_info(template, row_blk_size=row_blk_size, data_type=data_type)
     901             :       CALL dbcsr_create(matrix, "D", dist_right_in, dbcsr_type_no_symmetry, &
     902        4940 :                         row_blk_size, col_blk_size_right_in, nze=0, data_type=data_type)
     903             : 
     904        4940 :       CALL copy_fm_to_dbcsr(fm_in, matrix)
     905        4940 :       DEALLOCATE (col_blk_size_right_in)
     906        4940 :       CALL dbcsr_distribution_release(dist_right_in)
     907             : 
     908        4940 :    END SUBROUTINE cp_fm_to_dbcsr_row_template
     909             : 
     910             : ! **************************************************************************************************
     911             : !> \brief Utility function to create an arbitrary shaped dbcsr matrix
     912             : !>        with the same processor grid as the template matrix
     913             : !>        both row sizes and col sizes are evenly blocked with the internal
     914             : !>        dbcsr_conversion size (32 is the current default)
     915             : !> \param matrix dbcsr matrix to be created
     916             : !> \param template template dbcsr matrix giving its mp_env
     917             : !> \param m global row size of output matrix
     918             : !> \param n global col size of output matrix
     919             : !> \param sym ...
     920             : !> \param data_type ...
     921             : ! **************************************************************************************************
     922      284322 :    SUBROUTINE cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym, data_type)
     923             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix, template
     924             :       INTEGER, INTENT(IN)                                            :: m, n
     925             :       CHARACTER, OPTIONAL, INTENT(IN)                                :: sym
     926             :       INTEGER, OPTIONAL, INTENT(IN)                    :: data_type
     927             : 
     928             :       CHARACTER                                          :: mysym
     929             :       INTEGER                                            :: my_data_type, nprows, npcols
     930      142161 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, &
     931      142161 :                                                             col_dist, row_blk_size, &
     932      142161 :                                                             row_dist
     933             :       TYPE(dbcsr_distribution_type)                       :: tmpl_dist, dist_m_n
     934             : 
     935             :       CALL dbcsr_get_info(template, &
     936             :                           matrix_type=mysym, &
     937             :                           data_type=my_data_type, &
     938      142161 :                           distribution=tmpl_dist)
     939             : 
     940      142161 :       IF (PRESENT(sym)) mysym = sym
     941      142161 :       IF (PRESENT(data_type)) my_data_type = data_type
     942             : 
     943      142161 :       NULLIFY (row_dist, col_dist)
     944      142161 :       NULLIFY (row_blk_size, col_blk_size)
     945             :       !NULLIFY (row_cluster, col_cluster)
     946             : 
     947      142161 :       CALL dbcsr_distribution_get(tmpl_dist, nprows=nprows, npcols=npcols)
     948      142161 :       CALL create_bl_distribution(row_dist, row_blk_size, m, nprows)
     949      142161 :       CALL create_bl_distribution(col_dist, col_blk_size, n, npcols)
     950             :       CALL dbcsr_distribution_new(dist_m_n, template=tmpl_dist, &
     951             :                                   row_dist=row_dist, col_dist=col_dist, &
     952             :                                   !row_cluster=row_cluster, col_cluster=col_cluster, &
     953      142161 :                                   reuse_arrays=.TRUE.)
     954             : 
     955             :       CALL dbcsr_create(matrix, "m_n_template", dist_m_n, mysym, &
     956             :                         row_blk_size, col_blk_size, nze=0, data_type=my_data_type, &
     957      142161 :                         reuse_arrays=.TRUE.)
     958      142161 :       CALL dbcsr_distribution_release(dist_m_n)
     959             : 
     960      142161 :    END SUBROUTINE cp_dbcsr_m_by_n_from_template
     961             : 
     962             : ! **************************************************************************************************
     963             : !> \brief Utility function to create dbcsr matrix, m x n matrix (n arbitrary)
     964             : !>        with the same processor grid and row distribution  as the template matrix
     965             : !>        col sizes are evenly blocked with the internal
     966             : !>        dbcsr_conversion size (32 is the current default)
     967             : !> \param matrix dbcsr matrix to be created
     968             : !> \param template template dbcsr matrix giving its mp_env
     969             : !> \param n global col size of output matrix
     970             : !> \param sym ...
     971             : !> \param data_type ...
     972             : ! **************************************************************************************************
     973      505620 :    SUBROUTINE cp_dbcsr_m_by_n_from_row_template(matrix, template, n, sym, data_type)
     974             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix, template
     975             :       INTEGER                                            :: n
     976             :       CHARACTER, OPTIONAL                                :: sym
     977             :       INTEGER, OPTIONAL                                  :: data_type
     978             : 
     979             :       CHARACTER                                          :: mysym
     980             :       INTEGER                                            :: my_data_type, npcols
     981      126405 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
     982      126405 :                                                             row_dist
     983             :       TYPE(dbcsr_distribution_type)                       :: dist_m_n, tmpl_dist
     984             : 
     985      252810 :       mysym = dbcsr_get_matrix_type(template)
     986      126405 :       IF (PRESENT(sym)) mysym = sym
     987      126405 :       my_data_type = dbcsr_get_data_type(template)
     988      126405 :       IF (PRESENT(data_type)) my_data_type = data_type
     989             : 
     990      126405 :       CALL dbcsr_get_info(template, distribution=tmpl_dist)
     991             :       CALL dbcsr_distribution_get(tmpl_dist, &
     992             :                                   npcols=npcols, &
     993      126405 :                                   row_dist=row_dist)
     994             : 
     995      126405 :       NULLIFY (col_dist, col_blk_size)
     996      126405 :       CALL create_bl_distribution(col_dist, col_blk_size, n, npcols)
     997             :       CALL dbcsr_distribution_new(dist_m_n, template=tmpl_dist, &
     998      126405 :                                   row_dist=row_dist, col_dist=col_dist)
     999             : 
    1000      126405 :       CALL dbcsr_get_info(template, row_blk_size=row_blk_size)
    1001             :       CALL dbcsr_create(matrix, "m_n_template", dist_m_n, mysym, &
    1002      126405 :                         row_blk_size, col_blk_size, nze=0, data_type=my_data_type)
    1003             : 
    1004      126405 :       DEALLOCATE (col_dist, col_blk_size)
    1005      126405 :       CALL dbcsr_distribution_release(dist_m_n)
    1006             : 
    1007      126405 :    END SUBROUTINE cp_dbcsr_m_by_n_from_row_template
    1008             : 
    1009             : ! **************************************************************************************************
    1010             : !> \brief Distributes elements into blocks and into bins
    1011             : !>
    1012             : !> \param[out] block_distribution       block distribution to bins
    1013             : !> \param[out] block_size       sizes of blocks
    1014             : !> \param[in] nelements number of elements to bin
    1015             : !> \param[in] nbins             number of bins
    1016             : !> \par Term clarification
    1017             : !>      An example: blocks are atom blocks and bins are process rows/columns.
    1018             : ! **************************************************************************************************
    1019     1051176 :    SUBROUTINE create_bl_distribution(block_distribution, &
    1020             :                                      block_size, nelements, nbins)
    1021             :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: block_distribution, block_size
    1022             :       INTEGER, INTENT(IN)                                :: nelements, nbins
    1023             : 
    1024             :       CHARACTER(len=*), PARAMETER :: routineN = 'create_bl_distribution', &
    1025             :                                      routineP = moduleN//':'//routineN
    1026             : 
    1027             :       INTEGER                                            :: bin, blk_layer, element_stack, els, &
    1028             :                                                             estimated_blocks, max_blocks_per_bin, &
    1029             :                                                             nblks, nblocks, stat
    1030     1051176 :       INTEGER, DIMENSION(:), POINTER                     :: blk_dist, blk_sizes
    1031             : 
    1032             : !   ---------------------------------------------------------------------------
    1033             : 
    1034     1051176 :       NULLIFY (block_distribution)
    1035     1051176 :       NULLIFY (block_size)
    1036             :       ! Define the sizes on which we build the distribution.
    1037     1051176 :       IF (nelements .GT. 0) THEN
    1038             : 
    1039     1042466 :          nblocks = CEILING(REAL(nelements, KIND=dp)/REAL(max_elements_per_block, KIND=dp))
    1040     1042466 :          max_blocks_per_bin = CEILING(REAL(nblocks, KIND=dp)/REAL(nbins, KIND=dp))
    1041             : 
    1042             :          IF (debug_mod) THEN
    1043             :             WRITE (*, '(1X,A,1X,A,I7,A,I7,A)') routineP, "For", nelements, &
    1044             :                " elements and", nbins, " bins"
    1045             :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1046             :                max_elements_per_block, " max elements per block"
    1047             :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1048             :                nblocks, " blocks"
    1049             :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1050             :                max_blocks_per_bin, " max blocks/bin"
    1051             :          END IF
    1052             : 
    1053     1042466 :          estimated_blocks = max_blocks_per_bin*nbins
    1054     3127398 :          ALLOCATE (blk_dist(estimated_blocks), stat=stat)
    1055     1042466 :          IF (stat /= 0) &
    1056           0 :             CPABORT("blk_dist")
    1057     2084932 :          ALLOCATE (blk_sizes(estimated_blocks), stat=stat)
    1058             :          IF (stat /= 0) &
    1059           0 :             CPABORT("blk_sizes")
    1060     1042466 :          element_stack = 0
    1061     1042466 :          nblks = 0
    1062     2145158 :          DO blk_layer = 1, max_blocks_per_bin
    1063     3371676 :             DO bin = 0, nbins - 1
    1064     1226518 :                els = MIN(max_elements_per_block, nelements - element_stack)
    1065     2329210 :                IF (els .GT. 0) THEN
    1066     1114108 :                   element_stack = element_stack + els
    1067     1114108 :                   nblks = nblks + 1
    1068     1114108 :                   blk_dist(nblks) = bin
    1069     1114108 :                   blk_sizes(nblks) = els
    1070             :                   IF (debug_mod) WRITE (*, '(1X,A,I5,A,I5,A,I5)') routineP//" Assigning", &
    1071             :                      els, " elements as block", nblks, " to bin", bin
    1072             :                END IF
    1073             :             END DO
    1074             :          END DO
    1075             :          ! Create the output arrays.
    1076     1042466 :          IF (nblks .EQ. estimated_blocks) THEN
    1077      930056 :             block_distribution => blk_dist
    1078      930056 :             block_size => blk_sizes
    1079             :          ELSE
    1080      337230 :             ALLOCATE (block_distribution(nblks), stat=stat)
    1081             :             IF (stat /= 0) &
    1082           0 :                CPABORT("blk_dist")
    1083      455160 :             block_distribution(:) = blk_dist(1:nblks)
    1084      112410 :             DEALLOCATE (blk_dist)
    1085      224820 :             ALLOCATE (block_size(nblks), stat=stat)
    1086             :             IF (stat /= 0) &
    1087           0 :                CPABORT("blk_sizes")
    1088      455160 :             block_size(:) = blk_sizes(1:nblks)
    1089      112410 :             DEALLOCATE (blk_sizes)
    1090             :          END IF
    1091             :       ELSE
    1092        8710 :          ALLOCATE (block_distribution(0), stat=stat)
    1093             :          IF (stat /= 0) &
    1094           0 :             CPABORT("blk_dist")
    1095        8710 :          ALLOCATE (block_size(0), stat=stat)
    1096             :          IF (stat /= 0) &
    1097           0 :             CPABORT("blk_sizes")
    1098             :       END IF
    1099             : 1579  FORMAT(I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5)
    1100             :       IF (debug_mod) THEN
    1101             :          WRITE (*, '(1X,A,A)') routineP//" Distribution"
    1102             :          WRITE (*, 1579) block_distribution(:)
    1103             :          WRITE (*, '(1X,A,A)') routineP//" Sizes"
    1104             :          WRITE (*, 1579) block_size(:)
    1105             :       END IF
    1106     1051176 :    END SUBROUTINE create_bl_distribution
    1107             : 
    1108             : ! **************************************************************************************************
    1109             : !> \brief Creates a new distribution for the right matrix in a matrix
    1110             : !>        multiplication with unrotated grid.
    1111             : !> \param[out] dist_right     new distribution for the right matrix
    1112             : !> \param[in] dist_left       the distribution of the left matrix
    1113             : !> \param[in] ncolumns        number of columns in right matrix
    1114             : !> \param[out] right_col_blk_sizes      sizes of blocks in the created column
    1115             : !> \par The new row distribution for the right matrix is the same as the row
    1116             : !>      distribution of the left matrix, while the column distribution is
    1117             : !>      created so that it is appropriate to the parallel environment.
    1118             : ! **************************************************************************************************
    1119      452571 :    SUBROUTINE dbcsr_create_dist_r_unrot(dist_right, dist_left, ncolumns, &
    1120             :                                         right_col_blk_sizes)
    1121             :       TYPE(dbcsr_distribution_type), INTENT(OUT)          :: dist_right
    1122             :       TYPE(dbcsr_distribution_type), INTENT(IN)           :: dist_left
    1123             :       INTEGER, INTENT(IN)                                :: ncolumns
    1124             :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: right_col_blk_sizes
    1125             : 
    1126             :       INTEGER                                            :: multiplicity, nimages, ncols, nprows, npcols
    1127             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: tmp_images
    1128      452571 :       INTEGER, DIMENSION(:), POINTER                     :: right_col_dist, right_row_dist, old_col_dist, dummy
    1129             : 
    1130             :       CALL dbcsr_distribution_get(dist_left, &
    1131             :                                   ncols=ncols, &
    1132             :                                   col_dist=old_col_dist, &
    1133             :                                   nprows=nprows, &
    1134      452571 :                                   npcols=npcols)
    1135             : 
    1136             :       ! Create the column distribution
    1137      452571 :       CALL create_bl_distribution(right_col_dist, right_col_blk_sizes, ncolumns, npcols)
    1138             :       ! Create an even row distribution.
    1139     1810284 :       ALLOCATE (right_row_dist(ncols), tmp_images(ncols))
    1140      452571 :       nimages = lcm(nprows, npcols)/nprows
    1141      452571 :       multiplicity = nprows/gcd(nprows, npcols)
    1142      452571 :       CALL rebin_distribution(right_row_dist, tmp_images, old_col_dist, nprows, multiplicity, nimages)
    1143             : 
    1144      452571 :       NULLIFY (dummy)
    1145             :       CALL dbcsr_distribution_new(dist_right, &
    1146             :                                   template=dist_left, &
    1147             :                                   row_dist=right_row_dist, &
    1148             :                                   col_dist=right_col_dist, &
    1149             :                                   !row_cluster=dummy,&
    1150             :                                   !col_cluster=dummy,&
    1151      452571 :                                   reuse_arrays=.TRUE.)
    1152      452571 :       DEALLOCATE (tmp_images)
    1153      452571 :    END SUBROUTINE dbcsr_create_dist_r_unrot
    1154             : 
    1155             : ! **************************************************************************************************
    1156             : !> \brief Makes new distribution with decimation and multiplicity
    1157             : !> \param[out] new_bins      new real distribution
    1158             : !> \param[out] images        new image distribution
    1159             : !> \param[in] source_bins    Basis for the new distribution and images
    1160             : !> \param[in] nbins          number of bins in the new real distribution
    1161             : !> \param[in] multiplicity   multiplicity
    1162             : !> \param[in] nimages        number of images in the new distribution
    1163             : !> \par Definition of multiplicity and nimages
    1164             : !>      Multiplicity and decimation (number of images) are used to
    1165             : !>      match process grid coordinates on non-square process
    1166             : !>      grids. Given source_nbins and target_nbins, their relation is
    1167             : !>                source_nbins * target_multiplicity
    1168             : !>              = target_nbins * target_nimages.
    1169             : !>      It is best when both multiplicity and nimages are small. To
    1170             : !>      get these two factors, then, one can use the following formulas:
    1171             : !>          nimages      = lcm(source_nbins, target_nbins) / target_nbins
    1172             : !>          multiplicity = target_nbins / gcd(source_nbins, target_nbins)
    1173             : !>      from the target's point of view (nimages = target_nimages).
    1174             : !> \par Mapping
    1175             : !>      The new distribution comprises of real bins and images within
    1176             : !>      bins. These can be view as target_nbins*nimages virtual
    1177             : !>      columns. These same virtual columns are also
    1178             : !>      source_nbins*multiplicity in number. Therefore these virtual
    1179             : !>      columns are mapped from source_nbins*multiplicity onto
    1180             : !>      target_bins*nimages (each target bin has nimages images):
    1181             : !>      Source 4: |1 2 3|4 5 6|7 8 9|A B C| (4*3)
    1182             : !>      Target 6: |1 2|3 4|5 6|7 8|9 A|B C| (6*2)
    1183             : !>      multiplicity=3, nimages=2, 12 virtual columns (1-C).
    1184             : !>      Source bin elements are evenly mapped into one of multiplicity
    1185             : !>      virtual columns. Other (non-even, block-size aware) mappings
    1186             : !>      could be better.
    1187             : ! **************************************************************************************************
    1188      452571 :    SUBROUTINE rebin_distribution(new_bins, images, source_bins, &
    1189             :                                  nbins, multiplicity, nimages)
    1190             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: new_bins, images
    1191             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: source_bins
    1192             :       INTEGER, INTENT(IN)                                :: nbins, multiplicity, nimages
    1193             : 
    1194             :       INTEGER                                            :: bin, i, old_nbins, virtual_bin
    1195      452571 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: bin_multiplier
    1196             : 
    1197             : !   ---------------------------------------------------------------------------
    1198             : 
    1199      452571 :       IF (MOD(nbins*nimages, multiplicity) .NE. 0) &
    1200           0 :          CPWARN("mulitplicity is not divisor of new process grid coordinate")
    1201      452571 :       old_nbins = (nbins*nimages)/multiplicity
    1202     1357713 :       ALLOCATE (bin_multiplier(0:old_nbins - 1))
    1203      905142 :       bin_multiplier(:) = 0
    1204     2148755 :       DO i = 1, SIZE(new_bins)
    1205     1696184 :          IF (i .LE. SIZE(source_bins)) THEN
    1206     1696184 :             bin = source_bins(i)
    1207             :          ELSE
    1208             :             ! Fill remainder with a cyclic distribution
    1209           0 :             bin = MOD(i, old_nbins)
    1210             :          END IF
    1211     1696184 :          virtual_bin = bin*multiplicity + bin_multiplier(bin)
    1212     1696184 :          new_bins(i) = virtual_bin/nimages
    1213     1696184 :          images(i) = 1 + MOD(virtual_bin, nimages)
    1214     1696184 :          bin_multiplier(bin) = bin_multiplier(bin) + 1
    1215     2148755 :          IF (bin_multiplier(bin) .GE. multiplicity) THEN
    1216      724140 :             bin_multiplier(bin) = 0
    1217             :          END IF
    1218             :       END DO
    1219      452571 :    END SUBROUTINE rebin_distribution
    1220             : 
    1221             : ! **************************************************************************************************
    1222             : !> \brief Creates a block-cyclic compatible distribution
    1223             : !>
    1224             : !>        All blocks in a dimension, except for possibly the last
    1225             : !>        block, have the same size.
    1226             : !> \param[out] dist           the elemental distribution
    1227             : !> \param[in] nrows           number of full rows
    1228             : !> \param[in] ncolumns        number of full columns
    1229             : !> \param[in] nrow_block      size of row blocks
    1230             : !> \param[in] ncol_block      size of column blocks
    1231             : !> \param[in] mp_env          multiprocess environment
    1232             : !> \param[out] row_blk_sizes  row block sizes
    1233             : !> \param[out] col_blk_sizes  column block sizes
    1234             : ! **************************************************************************************************
    1235     2027349 :    SUBROUTINE dbcsr_create_dist_block_cyclic(dist, nrows, ncolumns, &
    1236             :                                              nrow_block, ncol_block, group_handle, pgrid, row_blk_sizes, col_blk_sizes)
    1237             :       TYPE(dbcsr_distribution_type), INTENT(OUT)          :: dist
    1238             :       INTEGER, INTENT(IN)                                :: nrows, ncolumns, nrow_block, ncol_block
    1239             :       INTEGER, INTENT(IN)                                :: group_handle
    1240             :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1241             :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: row_blk_sizes, col_blk_sizes
    1242             : 
    1243             :       CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_dist_block_cyclic'
    1244             : 
    1245             :       INTEGER                                            :: nblkcols, nblkrows, npcols, nprows, &
    1246             :                                                             pdim, sz
    1247     2027349 :       INTEGER, DIMENSION(:), POINTER                     :: cd_data, rd_data
    1248             : 
    1249             :       ! Row sizes
    1250     2027349 :       IF (nrow_block .EQ. 0) THEN
    1251             :          nblkrows = 0
    1252             :          sz = 0
    1253             :       ELSE
    1254     2027349 :          nblkrows = nrows/nrow_block
    1255     2027349 :          sz = MOD(nrows, nrow_block)
    1256             :       END IF
    1257     2027349 :       IF (sz .GT. 0) nblkrows = nblkrows + 1
    1258    10135209 :       ALLOCATE (row_blk_sizes(nblkrows), rd_data(nblkrows))
    1259     6051824 :       row_blk_sizes = nrow_block
    1260     2027349 :       IF (sz .NE. 0) row_blk_sizes(nblkrows) = sz
    1261             : 
    1262             :       ! Column sizes
    1263     2027349 :       IF (ncol_block .EQ. 0) THEN
    1264             :          nblkcols = 0
    1265             :          sz = 0
    1266             :       ELSE
    1267     2027349 :          nblkcols = ncolumns/ncol_block
    1268     2027349 :          sz = MOD(ncolumns, ncol_block)
    1269             :       END IF
    1270     2027349 :       IF (sz .GT. 0) nblkcols = nblkcols + 1
    1271    10131399 :       ALLOCATE (col_blk_sizes(nblkcols), cd_data(nblkcols))
    1272     4703472 :       col_blk_sizes = ncol_block
    1273     2027349 :       IF (sz .NE. 0) col_blk_sizes(nblkcols) = sz
    1274             :       !
    1275             :       IF (debug_mod) THEN
    1276             :          WRITE (*, *) routineN//" nrows,nrow_block,nblkrows=", &
    1277             :             nrows, nrow_block, nblkrows
    1278             :          WRITE (*, *) routineN//" ncols,ncol_block,nblkcols=", &
    1279             :             ncolumns, ncol_block, nblkcols
    1280             :       END IF
    1281             :       ! Calculate process row distribution
    1282     2027349 :       nprows = SIZE(pgrid, 1)
    1283     6013678 :       DO pdim = 0, MIN(nprows - 1, nblkrows - 1)
    1284    10038153 :          rd_data(1 + pdim:nblkrows:nprows) = pdim
    1285             :       END DO
    1286             :       ! Calculate process column distribution
    1287     2027349 :       npcols = SIZE(pgrid, 2)
    1288     4052916 :       DO pdim = 0, MIN(npcols - 1, nblkcols - 1)
    1289     6729039 :          cd_data(1 + pdim:nblkcols:npcols) = pdim
    1290             :       END DO
    1291             :       !
    1292             :       IF (debug_mod) THEN
    1293             :          WRITE (*, *) routineN//" row_dist", &
    1294             :             rd_data
    1295             :          WRITE (*, *) routineN//" col_dist", &
    1296             :             cd_data
    1297             :       END IF
    1298             :       !
    1299             :       CALL dbcsr_distribution_new(dist, &
    1300             :                                   group=group_handle, pgrid=pgrid, &
    1301             :                                   row_dist=rd_data, &
    1302             :                                   col_dist=cd_data, &
    1303     2027349 :                                   reuse_arrays=.TRUE.)
    1304             : 
    1305     2027349 :    END SUBROUTINE dbcsr_create_dist_block_cyclic
    1306             : 
    1307             : ! **************************************************************************************************
    1308             : !> \brief   Allocate and initialize a real matrix 1-dimensional set.
    1309             : !> \param[in,out] matrix_set  Set containing the DBCSR matrices
    1310             : !> \param[in] nmatrix         Size of set
    1311             : !> \par History
    1312             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1313             : ! **************************************************************************************************
    1314      179867 :    SUBROUTINE allocate_dbcsr_matrix_set_1d(matrix_set, nmatrix)
    1315             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_set
    1316             :       INTEGER, INTENT(IN)                                :: nmatrix
    1317             : 
    1318             :       INTEGER                                            :: imatrix
    1319             : 
    1320      179867 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1321      841950 :       ALLOCATE (matrix_set(nmatrix))
    1322      482216 :       DO imatrix = 1, nmatrix
    1323      482216 :          NULLIFY (matrix_set(imatrix)%matrix)
    1324             :       END DO
    1325      179867 :    END SUBROUTINE allocate_dbcsr_matrix_set_1d
    1326             : 
    1327             : ! **************************************************************************************************
    1328             : !> \brief   Allocate and initialize a real matrix 2-dimensional set.
    1329             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1330             : !> \param[in] nmatrix         Size of set
    1331             : !> \param mmatrix ...
    1332             : !> \par History
    1333             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1334             : ! **************************************************************************************************
    1335      135020 :    SUBROUTINE allocate_dbcsr_matrix_set_2d(matrix_set, nmatrix, mmatrix)
    1336             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: matrix_set
    1337             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix
    1338             : 
    1339             :       INTEGER                                            :: imatrix, jmatrix
    1340             : 
    1341      135020 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1342     1832892 :       ALLOCATE (matrix_set(nmatrix, mmatrix))
    1343      727854 :       DO jmatrix = 1, mmatrix
    1344     1427832 :          DO imatrix = 1, nmatrix
    1345     1292812 :             NULLIFY (matrix_set(imatrix, jmatrix)%matrix)
    1346             :          END DO
    1347             :       END DO
    1348      135020 :    END SUBROUTINE allocate_dbcsr_matrix_set_2d
    1349             : 
    1350             : ! **************************************************************************************************
    1351             : !> \brief   Allocate and initialize a real matrix 3-dimensional set.
    1352             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1353             : !> \param[in] nmatrix         Size of set
    1354             : !> \param mmatrix ...
    1355             : !> \param pmatrix ...
    1356             : !> \par History
    1357             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1358             : ! **************************************************************************************************
    1359           0 :    SUBROUTINE allocate_dbcsr_matrix_set_3d(matrix_set, nmatrix, mmatrix, pmatrix)
    1360             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :), POINTER    :: matrix_set
    1361             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix
    1362             : 
    1363             :       INTEGER                                            :: imatrix, jmatrix, kmatrix
    1364             : 
    1365           0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1366           0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix))
    1367           0 :       DO kmatrix = 1, pmatrix
    1368           0 :          DO jmatrix = 1, mmatrix
    1369           0 :             DO imatrix = 1, nmatrix
    1370           0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix)%matrix)
    1371             :             END DO
    1372             :          END DO
    1373             :       END DO
    1374           0 :    END SUBROUTINE allocate_dbcsr_matrix_set_3d
    1375             : 
    1376             : ! **************************************************************************************************
    1377             : !> \brief   Allocate and initialize a real matrix 4-dimensional set.
    1378             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1379             : !> \param[in] nmatrix         Size of set
    1380             : !> \param mmatrix ...
    1381             : !> \param pmatrix ...
    1382             : !> \par History
    1383             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1384             : ! **************************************************************************************************
    1385           0 :    SUBROUTINE allocate_dbcsr_matrix_set_4d(matrix_set, nmatrix, mmatrix, pmatrix, qmatrix)
    1386             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :), POINTER    :: matrix_set
    1387             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix, qmatrix
    1388             : 
    1389             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix
    1390             : 
    1391           0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1392           0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix, qmatrix))
    1393           0 :       DO lmatrix = 1, qmatrix
    1394           0 :       DO kmatrix = 1, pmatrix
    1395           0 :          DO jmatrix = 1, mmatrix
    1396           0 :             DO imatrix = 1, nmatrix
    1397           0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix, lmatrix)%matrix)
    1398             :             END DO
    1399             :          END DO
    1400             :       END DO
    1401             :       END DO
    1402           0 :    END SUBROUTINE allocate_dbcsr_matrix_set_4d
    1403             : 
    1404             : ! **************************************************************************************************
    1405             : !> \brief   Allocate and initialize a real matrix 5-dimensional set.
    1406             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1407             : !> \param[in] nmatrix         Size of set
    1408             : !> \param mmatrix ...
    1409             : !> \param pmatrix ...
    1410             : !> \par History
    1411             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1412             : ! **************************************************************************************************
    1413           0 :    SUBROUTINE allocate_dbcsr_matrix_set_5d(matrix_set, nmatrix, mmatrix, pmatrix, qmatrix, smatrix)
    1414             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :, :), POINTER    :: matrix_set
    1415             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix, qmatrix, smatrix
    1416             : 
    1417             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix, hmatrix
    1418             : 
    1419           0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1420           0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix, qmatrix, smatrix))
    1421           0 :       DO hmatrix = 1, smatrix
    1422           0 :       DO lmatrix = 1, qmatrix
    1423           0 :       DO kmatrix = 1, pmatrix
    1424           0 :          DO jmatrix = 1, mmatrix
    1425           0 :             DO imatrix = 1, nmatrix
    1426           0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix, lmatrix, hmatrix)%matrix)
    1427             :             END DO
    1428             :          END DO
    1429             :       END DO
    1430             :       END DO
    1431             :       END DO
    1432           0 :    END SUBROUTINE allocate_dbcsr_matrix_set_5d
    1433             : 
    1434             :    ! **************************************************************************************************
    1435             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1436             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1437             : !> \par History
    1438             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1439             : ! **************************************************************************************************
    1440      178658 :    SUBROUTINE deallocate_dbcsr_matrix_set_1d(matrix_set)
    1441             : 
    1442             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_set
    1443             : 
    1444             :       INTEGER                                            :: imatrix
    1445             : 
    1446      178658 :       IF (ASSOCIATED(matrix_set)) THEN
    1447      479624 :          DO imatrix = 1, SIZE(matrix_set)
    1448      479624 :             CALL dbcsr_deallocate_matrix(matrix_set(imatrix)%matrix)
    1449             :          END DO
    1450      177176 :          DEALLOCATE (matrix_set)
    1451             :       END IF
    1452             : 
    1453      178658 :    END SUBROUTINE deallocate_dbcsr_matrix_set_1d
    1454             : 
    1455             : ! **************************************************************************************************
    1456             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1457             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1458             : !> \par History
    1459             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1460             : ! **************************************************************************************************
    1461      139337 :    SUBROUTINE deallocate_dbcsr_matrix_set_2d(matrix_set)
    1462             : 
    1463             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: matrix_set
    1464             : 
    1465             :       INTEGER                                            :: imatrix, jmatrix
    1466             : 
    1467      139337 :       IF (ASSOCIATED(matrix_set)) THEN
    1468      733524 :          DO jmatrix = 1, SIZE(matrix_set, 2)
    1469     1438619 :             DO imatrix = 1, SIZE(matrix_set, 1)
    1470     1302284 :                CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix)%matrix)
    1471             :             END DO
    1472             :          END DO
    1473      136335 :          DEALLOCATE (matrix_set)
    1474             :       END IF
    1475      139337 :    END SUBROUTINE deallocate_dbcsr_matrix_set_2d
    1476             : 
    1477             : ! **************************************************************************************************
    1478             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1479             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1480             : !> \par History
    1481             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1482             : ! **************************************************************************************************
    1483           0 :    SUBROUTINE deallocate_dbcsr_matrix_set_3d(matrix_set)
    1484             : 
    1485             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :), POINTER    :: matrix_set
    1486             : 
    1487             :       INTEGER                                            :: imatrix, jmatrix, kmatrix
    1488             : 
    1489           0 :       IF (ASSOCIATED(matrix_set)) THEN
    1490           0 :          DO kmatrix = 1, SIZE(matrix_set, 3)
    1491           0 :             DO jmatrix = 1, SIZE(matrix_set, 2)
    1492           0 :                DO imatrix = 1, SIZE(matrix_set, 1)
    1493           0 :                   CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix)%matrix)
    1494             :                END DO
    1495             :             END DO
    1496             :          END DO
    1497           0 :          DEALLOCATE (matrix_set)
    1498             :       END IF
    1499           0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_3d
    1500             : 
    1501             : ! **************************************************************************************************
    1502             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1503             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1504             : !> \par History
    1505             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1506             : ! **************************************************************************************************
    1507           0 :    SUBROUTINE deallocate_dbcsr_matrix_set_4d(matrix_set)
    1508             : 
    1509             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :), POINTER    :: matrix_set
    1510             : 
    1511             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix
    1512             : 
    1513           0 :       IF (ASSOCIATED(matrix_set)) THEN
    1514           0 :          DO lmatrix = 1, SIZE(matrix_set, 4)
    1515           0 :          DO kmatrix = 1, SIZE(matrix_set, 3)
    1516           0 :             DO jmatrix = 1, SIZE(matrix_set, 2)
    1517           0 :                DO imatrix = 1, SIZE(matrix_set, 1)
    1518           0 :                   CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix, lmatrix)%matrix)
    1519             :                END DO
    1520             :             END DO
    1521             :          END DO
    1522             :          END DO
    1523           0 :          DEALLOCATE (matrix_set)
    1524             :       END IF
    1525           0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_4d
    1526             : 
    1527             : ! **************************************************************************************************
    1528             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1529             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1530             : !> \par History
    1531             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1532             : ! **************************************************************************************************
    1533           0 :    SUBROUTINE deallocate_dbcsr_matrix_set_5d(matrix_set)
    1534             : 
    1535             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :, :), POINTER    :: matrix_set
    1536             : 
    1537             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, hmatrix, lmatrix
    1538             : 
    1539           0 :       IF (ASSOCIATED(matrix_set)) THEN
    1540           0 :          DO hmatrix = 1, SIZE(matrix_set, 5)
    1541           0 :             DO lmatrix = 1, SIZE(matrix_set, 4)
    1542           0 :             DO kmatrix = 1, SIZE(matrix_set, 3)
    1543           0 :                DO jmatrix = 1, SIZE(matrix_set, 2)
    1544           0 :                   DO imatrix = 1, SIZE(matrix_set, 1)
    1545           0 :                      CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix, lmatrix, hmatrix)%matrix)
    1546             :                   END DO
    1547             :                END DO
    1548             :             END DO
    1549             :             END DO
    1550             :          END DO
    1551           0 :          DEALLOCATE (matrix_set)
    1552             :       END IF
    1553           0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_5d
    1554             : 
    1555             : END MODULE cp_dbcsr_operations

Generated by: LCOV version 1.15