LCOV - code coverage report
Current view: top level - src - cp_dbcsr_operations.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:2fce0f8) Lines: 364 456 79.8 %
Date: 2024-12-21 06:28:57 Functions: 25 32 78.1 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief   DBCSR operations in CP2K
      10             : !> \author  Urban Borstnik
      11             : !> \date    2009-05-12
      12             : !> \version 0.8
      13             : !>
      14             : !> <b>Modification history:</b>
      15             : !> - Created 2009-05-12
      16             : !> - Generalized sm_fm_mulitply for matrices w/ different row/col block size (A. Bussy, 11.2018)
      17             : ! **************************************************************************************************
      18             : MODULE cp_dbcsr_operations
      19             :    USE mathlib, ONLY: lcm, gcd
      20             :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21             :    USE cp_cfm_types, ONLY: cp_cfm_type
      22             :    USE cp_dbcsr_api, ONLY: dbcsr_distribution_get, &
      23             :                            dbcsr_convert_sizes_to_offsets, dbcsr_add, &
      24             :                            dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, &
      25             :                            dbcsr_deallocate_matrix, &
      26             :                            dbcsr_desymmetrize, dbcsr_distribution_new, &
      27             :                            dbcsr_get_data_type, dbcsr_get_info, dbcsr_get_matrix_type, &
      28             :                            dbcsr_iterator_type, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      29             :                            dbcsr_iterator_start, dbcsr_iterator_stop, &
      30             :                            dbcsr_multiply, dbcsr_norm, dbcsr_p_type, dbcsr_release, &
      31             :                            dbcsr_reserve_all_blocks, dbcsr_scale, dbcsr_type, &
      32             :                            dbcsr_valid_index, dbcsr_verify_matrix, &
      33             :                            dbcsr_distribution_type, dbcsr_distribution_release, &
      34             :                            dbcsr_norm_frobenius, &
      35             :                            dbcsr_type_antisymmetric, dbcsr_type_complex_8, dbcsr_type_no_symmetry, dbcsr_type_real_8, &
      36             :                            dbcsr_type_symmetric
      37             :    USE cp_fm_basic_linalg, ONLY: cp_fm_gemm
      38             :    USE cp_fm_struct, ONLY: cp_fm_struct_create, &
      39             :                            cp_fm_struct_release, &
      40             :                            cp_fm_struct_type
      41             :    USE cp_fm_types, ONLY: cp_fm_create, &
      42             :                           cp_fm_get_info, &
      43             :                           cp_fm_release, &
      44             :                           cp_fm_to_fm, &
      45             :                           cp_fm_type
      46             :    USE message_passing, ONLY: mp_para_env_type
      47             :    USE distribution_2d_types, ONLY: distribution_2d_get, &
      48             :                                     distribution_2d_type
      49             :    USE kinds, ONLY: dp, default_string_length
      50             :    USE message_passing, ONLY: mp_comm_type
      51             : 
      52             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      53             : #include "base/base_uses.f90"
      54             : 
      55             :    IMPLICIT NONE
      56             : 
      57             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_dbcsr_operations'
      58             :    LOGICAL, PARAMETER :: debug_mod = .FALSE.
      59             : 
      60             :    INTEGER, SAVE, PUBLIC :: max_elements_per_block = 32
      61             : 
      62             :    PUBLIC :: dbcsr_multiply_local
      63             : 
      64             :    ! CP2K API emulation
      65             :    PUBLIC :: copy_fm_to_dbcsr, copy_dbcsr_to_fm, &
      66             :              copy_dbcsr_to_cfm, copy_cfm_to_dbcsr, &
      67             :              cp_dbcsr_sm_fm_multiply, cp_dbcsr_plus_fm_fm_t, &
      68             :              copy_dbcsr_to_fm_bc, copy_fm_to_dbcsr_bc, cp_fm_to_dbcsr_row_template, &
      69             :              cp_dbcsr_m_by_n_from_template, cp_dbcsr_m_by_n_from_row_template, &
      70             :              dbcsr_create_dist_r_unrot
      71             : 
      72             :    ! distribution_2d_type compatibility
      73             :    PUBLIC :: cp_dbcsr_dist2d_to_dist
      74             : 
      75             :    PUBLIC :: dbcsr_copy_columns_hack
      76             : 
      77             :    ! matrix set
      78             :    PUBLIC :: dbcsr_allocate_matrix_set
      79             :    PUBLIC :: dbcsr_deallocate_matrix_set
      80             : 
      81             :    INTERFACE dbcsr_allocate_matrix_set
      82             :       #:for ii in range(1, 6)
      83             :          MODULE PROCEDURE allocate_dbcsr_matrix_set_${ii}$d
      84             :       #:endfor
      85             :    END INTERFACE
      86             : 
      87             :    INTERFACE dbcsr_deallocate_matrix_set
      88             :       #:for ii in range(1, 6)
      89             :          MODULE PROCEDURE deallocate_dbcsr_matrix_set_${ii}$d
      90             :       #:endfor
      91             :    END INTERFACE
      92             : 
      93             :    PRIVATE
      94             : 
      95             : CONTAINS
      96             : 
      97             :    #:for fm, type, constr in [("fm", "REAL", "REAL"), ("cfm", "COMPLEX", "CMPLX")]
      98             : 
      99             : ! **************************************************************************************************
     100             : !> \brief   Copy a BLACS matrix to a dbcsr matrix.
     101             : !>
     102             : !>          real_matrix=beta*real_matrix+alpha*fm
     103             : !>          beta defaults to 0, alpha to 1
     104             : !> \param[in] fm              full matrix
     105             : !> \param[out] matrix         DBCSR matrix
     106             : !> \param[in] keep_sparsity   (optional) retains the sparsity of the input
     107             : !>                            matrix
     108             : !> \date    2009-10-13
     109             : !> \par History
     110             : !>          2009-10-13 rewritten based on copy_dbcsr_to_fm
     111             : !> \author  Urban Borstnik
     112             : !> \version 2.0
     113             : ! **************************************************************************************************
     114     1056359 :       SUBROUTINE copy_${fm}$_to_dbcsr(fm, matrix, keep_sparsity)
     115             :          TYPE(cp_${fm}$_type), INTENT(IN)                   :: fm
     116             :          TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     117             :          LOGICAL, INTENT(IN), OPTIONAL                      :: keep_sparsity
     118             : 
     119             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_${fm}$_to_dbcsr'
     120             : 
     121             :          TYPE(dbcsr_type)                                   :: bc_mat, redist_mat
     122             :          INTEGER                                            :: handle
     123             :          LOGICAL                                            :: my_keep_sparsity
     124             : 
     125     1056359 :          CALL timeset(routineN, handle)
     126             : 
     127     1056359 :          my_keep_sparsity = .FALSE.
     128     1056359 :          IF (PRESENT(keep_sparsity)) my_keep_sparsity = keep_sparsity
     129             : 
     130     1056359 :          CALL copy_${fm}$_to_dbcsr_bc(fm, bc_mat)
     131             : 
     132     1056359 :          IF (my_keep_sparsity) THEN
     133       81366 :             CALL dbcsr_create(redist_mat, template=matrix)
     134       81366 :             CALL dbcsr_complete_redistribute(bc_mat, redist_mat)
     135       81366 :             CALL dbcsr_copy(matrix, redist_mat, keep_sparsity=.TRUE.)
     136       81366 :             CALL dbcsr_release(redist_mat)
     137             :          ELSE
     138      974993 :             CALL dbcsr_complete_redistribute(bc_mat, matrix)
     139             :          END IF
     140             : 
     141     1056359 :          CALL dbcsr_release(bc_mat)
     142             : 
     143     1056359 :          CALL timestop(handle)
     144     1056359 :       END SUBROUTINE copy_${fm}$_to_dbcsr
     145             : 
     146             : ! **************************************************************************************************
     147             : !> \brief   Copy a BLACS matrix to a dbcsr matrix with a special block-cyclic distribution,
     148             : !>           which requires no complete redistribution.
     149             : !> \param fm ...
     150             : !> \param bc_mat ...
     151             : ! **************************************************************************************************
     152     1056729 :       SUBROUTINE copy_${fm}$_to_dbcsr_bc(fm, bc_mat)
     153             :          TYPE(cp_${fm}$_type), INTENT(IN)                    :: fm
     154             :          TYPE(dbcsr_type), INTENT(INOUT)                     :: bc_mat
     155             : 
     156             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_${fm}$_to_dbcsr_bc'
     157             : 
     158             :          INTEGER :: col, handle, ncol_block, ncol_global, nrow_block, nrow_global, row
     159     1056729 :          INTEGER, ALLOCATABLE, DIMENSION(:)                  :: first_col, first_row, last_col, last_row
     160     1056729 :          INTEGER, DIMENSION(:), POINTER                      :: col_blk_size, row_blk_size
     161     1056729 :          ${type}$ (KIND=dp), DIMENSION(:, :), POINTER        :: fm_block, dbcsr_block
     162             :          TYPE(dbcsr_distribution_type)                       :: bc_dist
     163             :          TYPE(dbcsr_iterator_type)                           :: iter
     164     1056729 :          INTEGER, DIMENSION(:, :), POINTER                   :: pgrid
     165             : 
     166     1056729 :          CALL timeset(routineN, handle)
     167             : 
     168             :          #:if (type=="REAL")
     169     1054191 :             IF (fm%use_sp) CPABORT("copy_${fm}$_to_dbcsr_bc: single precision not supported")
     170             :          #:endif
     171             : 
     172             :          ! Create processor grid
     173     1056729 :          pgrid => fm%matrix_struct%context%blacs2mpi
     174             : 
     175             :          ! Create a block-cyclic distribution compatible with the FM matrix.
     176     1056729 :          nrow_block = fm%matrix_struct%nrow_block
     177     1056729 :          ncol_block = fm%matrix_struct%ncol_block
     178     1056729 :          nrow_global = fm%matrix_struct%nrow_global
     179     1056729 :          ncol_global = fm%matrix_struct%ncol_global
     180     1056729 :          NULLIFY (col_blk_size, row_blk_size)
     181             :          CALL dbcsr_create_dist_block_cyclic(bc_dist, &
     182             :                                              nrows=nrow_global, ncolumns=ncol_global, & ! Actual full matrix size
     183             :                                              nrow_block=nrow_block, ncol_block=ncol_block, & ! BLACS parameters
     184             :                                              group_handle=fm%matrix_struct%para_env%get_handle(), pgrid=pgrid, &
     185     1056729 :                                              row_blk_sizes=row_blk_size, col_blk_sizes=col_blk_size) ! block-cyclic row/col sizes
     186             : 
     187             :          ! Create the block-cyclic DBCSR matrix
     188             :          CALL dbcsr_create(bc_mat, "Block-cyclic ", bc_dist, &
     189             :                            dbcsr_type_no_symmetry, row_blk_size, col_blk_size, nze=0, &
     190     1056729 :                            reuse_arrays=.TRUE., data_type=dbcsr_type_${type.lower()}$_8)
     191     1056729 :          CALL dbcsr_distribution_release(bc_dist)
     192             : 
     193             :          ! allocate all blocks
     194     1056729 :          CALL dbcsr_reserve_all_blocks(bc_mat)
     195             : 
     196     1056729 :          CALL calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     197             : 
     198             :          ! Copy the FM data to the block-cyclic DBCSR matrix.  This step
     199             :          ! could be skipped with appropriate DBCSR index manipulation.
     200     1056729 :          fm_block => fm%local_data
     201             : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(iter, row, col, dbcsr_block) &
     202     1056729 : !$OMP SHARED(bc_mat, last_row, first_row, last_col, first_col, fm_block)
     203             :          CALL dbcsr_iterator_start(iter, bc_mat)
     204             :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     205             :             CALL dbcsr_iterator_next_block(iter, row, col, dbcsr_block)
     206             :             dbcsr_block(:, :) = fm_block(first_row(row):last_row(row), first_col(col):last_col(col))
     207             :          END DO
     208             :          CALL dbcsr_iterator_stop(iter)
     209             : !$OMP END PARALLEL
     210             : 
     211     1056729 :          CALL timestop(handle)
     212     2113458 :       END SUBROUTINE copy_${fm}$_to_dbcsr_bc
     213             : 
     214             : ! **************************************************************************************************
     215             : !> \brief Copy a DBCSR matrix to a BLACS matrix
     216             : !> \param[in] matrix          DBCSR matrix
     217             : !> \param[out] fm             full matrix
     218             : ! **************************************************************************************************
     219     3982428 :       SUBROUTINE copy_dbcsr_to_${fm}$ (matrix, fm)
     220             :          TYPE(dbcsr_type), INTENT(IN)                       :: matrix
     221             :          TYPE(cp_${fm}$_type), INTENT(IN)                   :: fm
     222             : 
     223             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_dbcsr_to_${fm}$'
     224             : 
     225      995607 :          INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
     226             :          INTEGER :: handle, ncol_block, nfullcols_total, group_handle, &
     227             :                     nfullrows_total, nrow_block
     228             :          TYPE(dbcsr_type)                                   :: bc_mat, matrix_nosym
     229             :          TYPE(dbcsr_distribution_type)                      :: dist, bc_dist
     230             :          CHARACTER(len=default_string_length)               :: name
     231      995607 :          INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     232             : 
     233      995607 :          CALL timeset(routineN, handle)
     234             : 
     235             :          ! check compatibility
     236             :          CALL dbcsr_get_info(matrix, &
     237             :                              name=name, &
     238             :                              distribution=dist, &
     239             :                              nfullrows_total=nfullrows_total, &
     240      995607 :                              nfullcols_total=nfullcols_total)
     241             : 
     242      995607 :          CPASSERT(fm%matrix_struct%nrow_global == nfullrows_total)
     243      995607 :          CPASSERT(fm%matrix_struct%ncol_global == nfullcols_total)
     244             : 
     245             :          ! info about the full matrix
     246      995607 :          nrow_block = fm%matrix_struct%nrow_block
     247      995607 :          ncol_block = fm%matrix_struct%ncol_block
     248             : 
     249             :          ! Convert DBCSR to a block-cyclic
     250      995607 :          NULLIFY (col_blk_size, row_blk_size)
     251      995607 :          CALL dbcsr_distribution_get(dist, group=group_handle, pgrid=pgrid)
     252             :          CALL dbcsr_create_dist_block_cyclic(bc_dist, &
     253             :                                              nrows=nfullrows_total, ncolumns=nfullcols_total, &
     254             :                                              nrow_block=nrow_block, ncol_block=ncol_block, &
     255             :                                              group_handle=group_handle, pgrid=pgrid, &
     256      995607 :                                              row_blk_sizes=row_blk_size, col_blk_sizes=col_blk_size)
     257             : 
     258             :          CALL dbcsr_create(bc_mat, "Block-cyclic"//name, bc_dist, &
     259             :                            dbcsr_type_no_symmetry, row_blk_size, col_blk_size, &
     260             :                            nze=0, data_type=dbcsr_get_data_type(matrix), &
     261      995607 :                            reuse_arrays=.TRUE.)
     262      995607 :          CALL dbcsr_distribution_release(bc_dist)
     263             : 
     264      995607 :          CALL dbcsr_create(matrix_nosym, template=matrix, matrix_type="N")
     265      995607 :          CALL dbcsr_desymmetrize(matrix, matrix_nosym)
     266      995607 :          CALL dbcsr_complete_redistribute(matrix_nosym, bc_mat)
     267      995607 :          CALL dbcsr_release(matrix_nosym)
     268             : 
     269      995607 :          CALL copy_dbcsr_to_${fm}$_bc(bc_mat, fm)
     270             : 
     271      995607 :          CALL dbcsr_release(bc_mat)
     272             : 
     273      995607 :          CALL timestop(handle)
     274      995607 :       END SUBROUTINE copy_dbcsr_to_${fm}$
     275             : 
     276             : ! **************************************************************************************************
     277             : !> \brief Copy a DBCSR_BLACS matrix to a BLACS matrix
     278             : !> \param bc_mat DBCSR matrix
     279             : !> \param[out] fm             full matrix
     280             : ! **************************************************************************************************
     281      995607 :       SUBROUTINE copy_dbcsr_to_${fm}$_bc(bc_mat, fm)
     282             :          TYPE(dbcsr_type), INTENT(IN)                       :: bc_mat
     283             :          TYPE(cp_${fm}$_type), INTENT(IN)                   :: fm
     284             : 
     285             :          CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_dbcsr_to_${fm}$_bc'
     286             : 
     287             :          INTEGER :: col, handle, row
     288      995607 :          INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_col, first_row, last_col, last_row
     289      995607 :          ${type}$ (KIND=dp), DIMENSION(:, :), POINTER       :: dbcsr_block, fm_block
     290             :          TYPE(dbcsr_iterator_type)                          :: iter
     291             : 
     292      995607 :          CALL timeset(routineN, handle)
     293             : 
     294             :          #:if (type=="REAL")
     295      993069 :             IF (fm%use_sp) CPABORT("copy_dbcsr_to_${fm}$_bc: single precision not supported")
     296             :          #:endif
     297             : 
     298      995607 :          CALL calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     299             : 
     300             :          ! Now copy data to the FM matrix
     301      995607 :          fm_block => fm%local_data
     302   405349905 :          fm_block = ${constr}$ (0.0, KIND=dp)
     303             : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(iter, row, col, dbcsr_block) &
     304      995607 : !$OMP SHARED(bc_mat, last_row, first_row, last_col, first_col, fm_block)
     305             :          CALL dbcsr_iterator_start(iter, bc_mat)
     306             :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     307             :             CALL dbcsr_iterator_next_block(iter, row, col, dbcsr_block)
     308             :             fm_block(first_row(row):last_row(row), first_col(col):last_col(col)) = dbcsr_block(:, :)
     309             :          END DO
     310             :          CALL dbcsr_iterator_stop(iter)
     311             : !$OMP END PARALLEL
     312             : 
     313      995607 :          CALL timestop(handle)
     314     1991214 :       END SUBROUTINE copy_dbcsr_to_${fm}$_bc
     315             : 
     316             :    #:endfor
     317             : 
     318             : ! **************************************************************************************************
     319             : !> \brief Helper routine used to copy blocks from DBCSR into FM matrices and vice versa
     320             : !> \param bc_mat ...
     321             : !> \param first_row ...
     322             : !> \param last_row ...
     323             : !> \param first_col ...
     324             : !> \param last_col ...
     325             : !> \author Ole Schuett
     326             : ! **************************************************************************************************
     327     2052336 :    SUBROUTINE calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     328             :       TYPE(dbcsr_type), INTENT(IN)                    :: bc_mat
     329             :       INTEGER :: col, nblkcols_local, nblkcols_total, nblkrows_local, nblkrows_total, row
     330             :       INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT)    :: first_col, first_row, last_col, &
     331             :                                                             last_row
     332             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_col_sizes, local_row_sizes
     333     2052336 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, local_cols, local_rows, &
     334     2052336 :                                                             row_blk_size
     335             : 
     336             :       CALL dbcsr_get_info(bc_mat, &
     337             :                           nblkrows_total=nblkrows_total, &
     338             :                           nblkcols_total=nblkcols_total, &
     339             :                           nblkrows_local=nblkrows_local, &
     340             :                           nblkcols_local=nblkcols_local, &
     341             :                           local_rows=local_rows, &
     342             :                           local_cols=local_cols, &
     343             :                           row_blk_size=row_blk_size, &
     344     2052336 :                           col_blk_size=col_blk_size)
     345             : 
     346             :       ! calculate first_row and last_row
     347     6156464 :       ALLOCATE (local_row_sizes(nblkrows_total))
     348     6520436 :       local_row_sizes(:) = 0
     349     2052336 :       IF (nblkrows_local .GE. 1) THEN
     350     4057139 :          DO row = 1, nblkrows_local
     351     4057139 :             local_row_sizes(local_rows(row)) = row_blk_size(local_rows(row))
     352             :          END DO
     353             :       END IF
     354    10259504 :       ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total))
     355     2052336 :       CALL dbcsr_convert_sizes_to_offsets(local_row_sizes, first_row, last_row)
     356     2052336 :       DEALLOCATE (local_row_sizes)
     357             : 
     358             :       ! calculate first_col and last_col
     359     6155226 :       ALLOCATE (local_col_sizes(nblkcols_total))
     360     9093571 :       local_col_sizes(:) = 0
     361     2052336 :       IF (nblkcols_local .GE. 1) THEN
     362     9091789 :          DO col = 1, nblkcols_local
     363     9091789 :             local_col_sizes(local_cols(col)) = col_blk_size(local_cols(col))
     364             :          END DO
     365             :       END IF
     366    10254552 :       ALLOCATE (first_col(nblkcols_total), last_col(nblkcols_total))
     367     2052336 :       CALL dbcsr_convert_sizes_to_offsets(local_col_sizes, first_col, last_col)
     368     2052336 :       DEALLOCATE (local_col_sizes)
     369             : 
     370     2052336 :    END SUBROUTINE calculate_fm_block_ranges
     371             : 
     372             : ! **************************************************************************************************
     373             : !> \brief hack for dbcsr_copy_columns
     374             : !> \param matrix_b ...
     375             : !> \param matrix_a ...
     376             : !> \param ncol ...
     377             : !> \param source_start ...
     378             : !> \param target_start ...
     379             : !> \param para_env ...
     380             : !> \param blacs_env ...
     381             : !> \author vw
     382             : ! **************************************************************************************************
     383        9016 :    SUBROUTINE dbcsr_copy_columns_hack(matrix_b, matrix_a, &
     384             :                                       ncol, source_start, target_start, para_env, blacs_env)
     385             : 
     386             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix_b
     387             :       TYPE(dbcsr_type), INTENT(IN)                    :: matrix_a
     388             :       INTEGER, INTENT(IN)                                :: ncol, source_start, target_start
     389             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     390             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     391             : 
     392             :       INTEGER                                            :: nfullcols_total, nfullrows_total
     393             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     394             :       TYPE(cp_fm_type)                         :: fm_matrix_a, fm_matrix_b
     395             : 
     396        2254 :       NULLIFY (fm_struct)
     397        2254 :       CALL dbcsr_get_info(matrix_a, nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
     398             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nfullrows_total, &
     399        2254 :                                ncol_global=nfullcols_total, para_env=para_env)
     400        2254 :       CALL cp_fm_create(fm_matrix_a, fm_struct, name="fm_matrix_a")
     401        2254 :       CALL cp_fm_struct_release(fm_struct)
     402             : 
     403        2254 :       CALL dbcsr_get_info(matrix_b, nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
     404             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nfullrows_total, &
     405        2254 :                                ncol_global=nfullcols_total, para_env=para_env)
     406        2254 :       CALL cp_fm_create(fm_matrix_b, fm_struct, name="fm_matrix_b")
     407        2254 :       CALL cp_fm_struct_release(fm_struct)
     408             : 
     409        2254 :       CALL copy_dbcsr_to_fm(matrix_a, fm_matrix_a)
     410        2254 :       CALL copy_dbcsr_to_fm(matrix_b, fm_matrix_b)
     411             : 
     412        2254 :       CALL cp_fm_to_fm(fm_matrix_a, fm_matrix_b, ncol, source_start, target_start)
     413             : 
     414        2254 :       CALL copy_fm_to_dbcsr(fm_matrix_b, matrix_b)
     415             : 
     416        2254 :       CALL cp_fm_release(fm_matrix_a)
     417        2254 :       CALL cp_fm_release(fm_matrix_b)
     418             : 
     419        2254 :    END SUBROUTINE dbcsr_copy_columns_hack
     420             : 
     421             : ! **************************************************************************************************
     422             : !> \brief Creates a DBCSR distribution from a distribution_2d
     423             : !> \param[in] dist2d          distribution_2d
     424             : !> \param[out] dist           DBCSR distribution
     425             : !> \par History
     426             : !>    move form dbcsr_operation 01.2010
     427             : ! **************************************************************************************************
     428        9496 :    SUBROUTINE cp_dbcsr_dist2d_to_dist(dist2d, dist)
     429             :       TYPE(distribution_2d_type), INTENT(IN), TARGET     :: dist2d
     430             :       TYPE(dbcsr_distribution_type), INTENT(OUT)          :: dist
     431             : 
     432        9496 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid, col_dist_data, row_dist_data
     433             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     434             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     435             :       TYPE(distribution_2d_type), POINTER                :: dist2d_p
     436        9496 :       INTEGER, DIMENSION(:), POINTER         :: row_dist, col_dist
     437             : 
     438        9496 :       dist2d_p => dist2d
     439             :       CALL distribution_2d_get(dist2d_p, &
     440             :                                row_distribution=row_dist_data, &
     441             :                                col_distribution=col_dist_data, &
     442        9496 :                                blacs_env=blacs_env)
     443        9496 :       CALL blacs_env%get(para_env=para_env, blacs2mpi=pgrid)
     444             : 
     445             :       ! map to 1D arrays
     446        9496 :       row_dist => row_dist_data(:, 1)
     447        9496 :       col_dist => col_dist_data(:, 1)
     448             :       !row_cluster => row_dist_data(:, 2)
     449             :       !col_cluster => col_dist_data(:, 2)
     450             : 
     451             :       CALL dbcsr_distribution_new(dist, &
     452             :                                   group=para_env%get_handle(), pgrid=pgrid, &
     453             :                                   row_dist=row_dist, &
     454        9496 :                                   col_dist=col_dist)
     455             : 
     456        9496 :    END SUBROUTINE cp_dbcsr_dist2d_to_dist
     457             : 
     458             : ! **************************************************************************************************
     459             : !> \brief multiply a dbcsr with a replicated array
     460             : !>        c = alpha_scalar * A (dbscr) * b + c
     461             : !> \param[in] matrix_a DBSCR matrxx
     462             : !> \param[in]  vec_b        vectors b
     463             : !> \param[inout] vec_c      vectors c
     464             : !> \param[in]  ncol         nbr of columns
     465             : !> \param[in]  alpha        alpha
     466             : !>
     467             : ! **************************************************************************************************
     468           0 :    SUBROUTINE dbcsr_multiply_local(matrix_a, vec_b, vec_c, ncol, alpha)
     469             :       TYPE(dbcsr_type), INTENT(IN)                    :: matrix_a
     470             :       REAL(dp), DIMENSION(:, :), INTENT(IN)              :: vec_b
     471             :       REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: vec_c
     472             :       INTEGER, INTENT(in), OPTIONAL                      :: ncol
     473             :       REAL(dp), INTENT(IN), OPTIONAL                     :: alpha
     474             : 
     475             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_multiply_local'
     476             : 
     477             :       INTEGER                                            :: blk, col, coloff, my_ncol, row, rowoff, &
     478             :                                                             timing_handle
     479             :       LOGICAL                                            :: has_symm
     480             :       REAL(dp)                                           :: my_alpha, my_alpha2
     481           0 :       REAL(dp), DIMENSION(:, :), POINTER                 :: data_d
     482             :       TYPE(dbcsr_iterator_type)                            :: iter
     483             : 
     484           0 :       CALL timeset(routineN, timing_handle)
     485             : 
     486           0 :       my_alpha = 1.0_dp
     487           0 :       IF (PRESENT(alpha)) my_alpha = alpha
     488             : 
     489           0 :       my_ncol = SIZE(vec_b, 2)
     490           0 :       IF (PRESENT(ncol)) my_ncol = ncol
     491             : 
     492           0 :       my_alpha2 = 0.0_dp
     493           0 :       IF (dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_symmetric) my_alpha2 = my_alpha
     494           0 :       IF (dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_antisymmetric) my_alpha2 = -my_alpha
     495             : 
     496             :       has_symm = (dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_symmetric .OR. &
     497           0 :                   dbcsr_get_matrix_type(matrix_a) .EQ. dbcsr_type_antisymmetric)
     498             : 
     499             : !$OMP     PARALLEL DEFAULT(NONE) SHARED(matrix_a,vec_b,vec_c,ncol,my_alpha2,my_alpha,my_ncol,has_symm) &
     500           0 : !$OMP              PRIVATE(iter,row,col,data_d,blk,rowoff,coloff)
     501             :       CALL dbcsr_iterator_start(iter, matrix_a, read_only=.TRUE., dynamic=.TRUE., dynamic_byrows=.TRUE.)
     502             :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     503             :          CALL dbcsr_iterator_next_block(iter, row, col, data_d, blk, row_offset=rowoff, col_offset=coloff)
     504             :          IF (my_ncol .NE. 1) THEN
     505             :             CALL dgemm('N', 'N', &
     506             :                        SIZE(data_d, 1), my_ncol, SIZE(data_d, 2), &
     507             :                        my_alpha, data_d(1, 1), SIZE(data_d, 1), &
     508             :                        vec_b(coloff, 1), SIZE(vec_b, 1), &
     509             :                        1.0_dp, vec_c(rowoff, 1), SIZE(vec_c, 1))
     510             :          ELSE
     511             :             CALL dgemv('N', SIZE(data_d, 1), SIZE(data_d, 2), &
     512             :                        my_alpha, data_d(1, 1), SIZE(data_d, 1), &
     513             :                        vec_b(coloff, 1), 1, &
     514             :                        1.0_dp, vec_c(rowoff, 1), 1)
     515             :          END IF
     516             :       END DO
     517             :       CALL dbcsr_iterator_stop(iter)
     518             : !$OMP     END PARALLEL
     519             : 
     520             :       ! FIXME ... in the symmetric case, the writes to vec_c depend on the column, not the row. This makes OMP-ing more difficult
     521             :       ! needs e.g. a buffer for vec_c and a reduction of that buffer.
     522           0 :       IF (has_symm) THEN
     523           0 :          CALL dbcsr_iterator_start(iter, matrix_a)
     524           0 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     525           0 :             CALL dbcsr_iterator_next_block(iter, row, col, data_d, blk, row_offset=rowoff, col_offset=coloff)
     526           0 :             IF (row .NE. col) THEN
     527           0 :                IF (my_ncol .NE. 1) THEN
     528             :                   CALL dgemm('T', 'N', &
     529             :                              SIZE(data_d, 2), my_ncol, SIZE(data_d, 1), &
     530             :                              my_alpha2, data_d(1, 1), SIZE(data_d, 1), &
     531             :                              vec_b(rowoff, 1), SIZE(vec_b, 1), &
     532           0 :                              1.0_dp, vec_c(coloff, 1), SIZE(vec_c, 1))
     533             :                ELSE
     534             :                   CALL dgemv('T', SIZE(data_d, 1), SIZE(data_d, 2), &
     535             :                              my_alpha2, data_d(1, 1), SIZE(data_d, 1), &
     536             :                              vec_b(rowoff, 1), 1, &
     537           0 :                              1.0_dp, vec_c(coloff, 1), 1)
     538             :                END IF
     539             :             END IF
     540             :          END DO
     541           0 :          CALL dbcsr_iterator_stop(iter)
     542             :       END IF
     543             : 
     544           0 :       CALL timestop(timing_handle)
     545           0 :    END SUBROUTINE dbcsr_multiply_local
     546             : 
     547             : ! **************************************************************************************************
     548             : !> \brief multiply a dbcsr with a fm matrix
     549             : !>
     550             : !> For backwards compatibility with BLAS XGEMM, this routine supports
     551             : !> the multiplication of matrices with incompatible dimensions.
     552             : !>
     553             : !> \param[in]  matrix         DBCSR matrix
     554             : !> \param fm_in full matrix
     555             : !> \param fm_out full matrix
     556             : !> \param[in]  ncol           nbr of columns
     557             : !> \param[in]  alpha          alpha
     558             : !> \param[in]  beta           beta
     559             : !>
     560             : ! **************************************************************************************************
     561     2669622 :    SUBROUTINE cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
     562             :       TYPE(dbcsr_type), INTENT(IN)                    :: matrix
     563             :       TYPE(cp_fm_type), INTENT(IN)                          :: fm_in, fm_out
     564             :       INTEGER, INTENT(IN)                                :: ncol
     565             :       REAL(dp), INTENT(IN), OPTIONAL                     :: alpha, beta
     566             : 
     567             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_sm_fm_multiply'
     568             : 
     569             :       INTEGER                                            :: k_in, k_out, timing_handle, &
     570             :                                                             timing_handle_mult, &
     571             :                                                             a_ncol, a_nrow, b_ncol, b_nrow, c_ncol, c_nrow
     572      444937 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_right_in, &
     573      444937 :                                                             col_blk_size_right_out, row_blk_size, &
     574             :                                                             !row_cluster, col_cluster,&
     575      444937 :                                                             row_dist, col_dist, col_blk_size
     576             :       TYPE(dbcsr_type)                                   :: in, out
     577             :       REAL(dp)                                           :: my_alpha, my_beta
     578             :       TYPE(dbcsr_distribution_type)                       :: dist, dist_right_in, product_dist
     579             : 
     580      444937 :       CALL timeset(routineN, timing_handle)
     581             : 
     582      444937 :       my_alpha = 1.0_dp
     583      444937 :       my_beta = 0.0_dp
     584      444937 :       IF (PRESENT(alpha)) my_alpha = alpha
     585      444937 :       IF (PRESENT(beta)) my_beta = beta
     586             : 
     587             :       ! TODO
     588      444937 :       CALL cp_fm_get_info(fm_in, ncol_global=b_ncol, nrow_global=b_nrow)
     589      444937 :       CALL cp_fm_get_info(fm_out, ncol_global=c_ncol, nrow_global=c_nrow)
     590      444937 :       CALL dbcsr_get_info(matrix, nfullrows_total=a_nrow, nfullcols_total=a_ncol)
     591             :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: A ", a_nrow, "x", a_ncol
     592             :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: B ", b_nrow, "x", b_ncol
     593             :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: C ", c_nrow, "x", c_ncol
     594             : 
     595      444937 :       CALL cp_fm_get_info(fm_out, ncol_global=k_out)
     596             : 
     597      444937 :       CALL cp_fm_get_info(fm_in, ncol_global=k_in)
     598             :       !write(*,*)routineN//" -----------------------------------"
     599             :       !IF (k_in .NE. k_out) &
     600             :       !   WRITE(*,'(3(A,I5,1X),2(A,F5.2,1X))')&
     601             :       !   routineN//" ncol", ncol,'k_in',k_in,'k_out',k_out,&
     602             :       !   'alpha',my_alpha,'beta',my_beta
     603             : 
     604      444937 :       IF (ncol .GT. 0 .AND. k_out .GT. 0 .AND. k_in .GT. 0) THEN
     605      443907 :          CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, col_blk_size=col_blk_size, distribution=dist)
     606      443907 :          CALL dbcsr_create_dist_r_unrot(dist_right_in, dist, k_in, col_blk_size_right_in)
     607             : 
     608             :          CALL dbcsr_create(in, "D", dist_right_in, dbcsr_type_no_symmetry, &
     609      443907 :                            col_blk_size, col_blk_size_right_in, nze=0)
     610             : 
     611      443907 :          CALL dbcsr_distribution_get(dist, row_dist=row_dist)
     612      443907 :          CALL dbcsr_distribution_get(dist_right_in, col_dist=col_dist)
     613             :          CALL dbcsr_distribution_new(product_dist, template=dist, &
     614      443907 :                                      row_dist=row_dist, col_dist=col_dist)
     615     1331721 :          ALLOCATE (col_blk_size_right_out(SIZE(col_blk_size_right_in)))
     616     1803696 :          col_blk_size_right_out = col_blk_size_right_in
     617      443907 :          CALL match_col_sizes(col_blk_size_right_out, col_blk_size_right_in, k_out)
     618             : 
     619             :          !if (k_in .ne. k_out) then
     620             :          !   write(*,*)routineN//" in cs", col_blk_size_right_in
     621             :          !   write(*,*)routineN//" out cs", col_blk_size_right_out
     622             :          !endif
     623             : 
     624             :          CALL dbcsr_create(out, "D", product_dist, dbcsr_type_no_symmetry, &
     625      443907 :                            row_blk_size, col_blk_size_right_out, nze=0)
     626             : 
     627      443907 :          CALL copy_fm_to_dbcsr(fm_in, in)
     628      443907 :          IF (ncol .NE. k_out .OR. my_beta .NE. 0.0_dp) &
     629       99926 :             CALL copy_fm_to_dbcsr(fm_out, out)
     630             : 
     631      443907 :          CALL timeset(routineN//'_core', timing_handle_mult)
     632             :          CALL dbcsr_multiply("N", "N", my_alpha, matrix, in, my_beta, out, &
     633      443907 :                              last_column=ncol)
     634      443907 :          CALL timestop(timing_handle_mult)
     635             : 
     636      443907 :          CALL copy_dbcsr_to_fm(out, fm_out)
     637             : 
     638      443907 :          CALL dbcsr_release(in)
     639      443907 :          CALL dbcsr_release(out)
     640      443907 :          DEALLOCATE (col_blk_size_right_in, col_blk_size_right_out)
     641      443907 :          CALL dbcsr_distribution_release(dist_right_in)
     642     1775628 :          CALL dbcsr_distribution_release(product_dist)
     643             : 
     644             :       END IF
     645             : 
     646      444937 :       CALL timestop(timing_handle)
     647             : 
     648      444937 :    END SUBROUTINE cp_dbcsr_sm_fm_multiply
     649             : 
     650             : ! **************************************************************************************************
     651             : !> \brief ...
     652             : !> \param sizes1 ...
     653             : !> \param sizes2 ...
     654             : !> \param full_num ...
     655             : ! **************************************************************************************************
     656      443907 :    SUBROUTINE match_col_sizes(sizes1, sizes2, full_num)
     657             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: sizes1
     658             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: sizes2
     659             :       INTEGER, INTENT(IN)                                :: full_num
     660             : 
     661             :       INTEGER                                            :: left, n1, n2, p, rm, used
     662             : 
     663      443907 :       n1 = SIZE(sizes1)
     664      443907 :       n2 = SIZE(sizes2)
     665      443907 :       IF (n1 .NE. n2) &
     666           0 :          CPABORT("distributions must be equal!")
     667      901848 :       sizes1(1:n1) = sizes2(1:n1)
     668      901848 :       used = SUM(sizes1(1:n1))
     669             :       ! If sizes1 does not cover everything, then we increase the
     670             :       ! size of the last block; otherwise we reduce the blocks
     671             :       ! (from the end) until it is small enough.
     672      443907 :       IF (used .LT. full_num) THEN
     673           0 :          sizes1(n1) = sizes1(n1) + full_num - used
     674             :       ELSE
     675      443907 :          left = used - full_num
     676      443907 :          p = n1
     677      443907 :          DO WHILE (left .GT. 0 .AND. p .GT. 0)
     678           0 :             rm = MIN(left, sizes1(p))
     679           0 :             sizes1(p) = sizes1(p) - rm
     680           0 :             left = left - rm
     681           0 :             p = p - 1
     682             :          END DO
     683             :       END IF
     684      443907 :    END SUBROUTINE match_col_sizes
     685             : 
     686             : ! **************************************************************************************************
     687             : !> \brief performs the multiplication sparse_matrix+dense_mat*dens_mat^T
     688             : !>        if matrix_g is not explicitly given, matrix_v^T will be used
     689             : !>        this can be important to save the necessary redistribute for a
     690             : !>        different matrix_g and increase performance.
     691             : !> \param sparse_matrix ...
     692             : !> \param matrix_v ...
     693             : !> \param matrix_g ...
     694             : !> \param ncol ...
     695             : !> \param alpha ...
     696             : !> \param keep_sparsity Determines if the sparsity of sparse_matrix is retained
     697             : !>        by default it is TRUE
     698             : !> \param symmetry_mode There are the following modes
     699             : !>        1:     sparse_matrix += 0.5*alpha*(v*g^T+g^T*v)    (symmetric update)
     700             : !>        -1:    sparse_matrix += 0.5*alpha*(v*g^T-g^T*v)    (skewsymmetric update)
     701             : !>        else:  sparse_matrix += alpha*v*g^T                (no symmetry, default)
     702             : !>        saves some redistribution steps
     703             : ! **************************************************************************************************
     704      190918 :    SUBROUTINE cp_dbcsr_plus_fm_fm_t(sparse_matrix, matrix_v, matrix_g, ncol, alpha, keep_sparsity, symmetry_mode)
     705             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: sparse_matrix
     706             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_v
     707             :       TYPE(cp_fm_type), OPTIONAL, INTENT(IN)             :: matrix_g
     708             :       INTEGER, INTENT(IN)                                :: ncol
     709             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: alpha
     710             :       LOGICAL, INTENT(IN), OPTIONAL                      :: keep_sparsity
     711             :       INTEGER, INTENT(IN), OPTIONAL :: symmetry_mode
     712             : 
     713             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_plus_fm_fm_t_native'
     714             : 
     715             :       INTEGER                                            :: npcols, k, nao, timing_handle, data_type, my_symmetry_mode
     716      190918 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_left, &
     717      190918 :                                                             col_dist_left, row_blk_size, row_dist
     718             :       LOGICAL                                            :: check_product, my_keep_sparsity
     719             :       REAL(KIND=dp)                                      :: my_alpha, norm
     720             :       TYPE(dbcsr_type)                                :: mat_g, mat_v, sparse_matrix2, &
     721             :                                                          sparse_matrix3
     722             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     723             :       TYPE(cp_fm_type)                          :: fm_matrix
     724             :       TYPE(dbcsr_distribution_type)                       :: dist_left, sparse_dist
     725             : 
     726      190918 :       check_product = .FALSE.
     727             : 
     728      190918 :       CALL timeset(routineN, timing_handle)
     729             : 
     730      190918 :       my_keep_sparsity = .TRUE.
     731      190918 :       IF (PRESENT(keep_sparsity)) my_keep_sparsity = keep_sparsity
     732             : 
     733      190918 :       my_symmetry_mode = 0
     734      190918 :       IF (PRESENT(symmetry_mode)) my_symmetry_mode = symmetry_mode
     735             : 
     736      190918 :       NULLIFY (col_dist_left)
     737             : 
     738      190918 :       IF (ncol .GT. 0) THEN
     739      189438 :          IF (.NOT. dbcsr_valid_index(sparse_matrix)) &
     740           0 :             CPABORT("sparse_matrix must pre-exist")
     741             :          !
     742             :          ! Setup matrix_v
     743      189438 :          CALL cp_fm_get_info(matrix_v, ncol_global=k)
     744             :          !WRITE(*,*)routineN//'truncated mult k, ncol',k,ncol,' PRESENT (matrix_g)',PRESENT (matrix_g)
     745      189438 :          CALL dbcsr_get_info(sparse_matrix, distribution=sparse_dist)
     746      189438 :          CALL dbcsr_distribution_get(sparse_dist, npcols=npcols, row_dist=row_dist)
     747      189438 :          CALL create_bl_distribution(col_dist_left, col_blk_size_left, k, npcols)
     748             :          CALL dbcsr_distribution_new(dist_left, template=sparse_dist, &
     749      189438 :                                      row_dist=row_dist, col_dist=col_dist_left)
     750      189438 :          DEALLOCATE (col_dist_left)
     751      189438 :          CALL dbcsr_get_info(sparse_matrix, row_blk_size=row_blk_size, data_type=data_type)
     752             :          CALL dbcsr_create(mat_v, "DBCSR matrix_v", dist_left, dbcsr_type_no_symmetry, &
     753      189438 :                            row_blk_size, col_blk_size_left, nze=0, data_type=data_type)
     754      189438 :          CALL copy_fm_to_dbcsr(matrix_v, mat_v)
     755      189438 :          CALL dbcsr_verify_matrix(mat_v)
     756             :          !
     757             :          ! Setup matrix_g
     758      189438 :          IF (PRESENT(matrix_g)) THEN
     759             :             CALL dbcsr_create(mat_g, "DBCSR matrix_g", dist_left, dbcsr_type_no_symmetry, &
     760       86565 :                               row_blk_size, col_blk_size_left, data_type=data_type)
     761       86565 :             CALL copy_fm_to_dbcsr(matrix_g, mat_g)
     762             :          END IF
     763             :          !
     764      189438 :          DEALLOCATE (col_blk_size_left)
     765      189438 :          CALL dbcsr_distribution_release(dist_left)
     766             :          !
     767             :          !
     768             :          IF (check_product) THEN
     769             :             CALL cp_fm_get_info(matrix_v, nrow_global=nao)
     770             :             CALL cp_fm_struct_create(fm_struct_tmp, context=matrix_v%matrix_struct%context, nrow_global=nao, &
     771             :                                      ncol_global=nao, para_env=matrix_v%matrix_struct%para_env)
     772             :             CALL cp_fm_create(fm_matrix, fm_struct_tmp, name="fm matrix")
     773             :             CALL cp_fm_struct_release(fm_struct_tmp)
     774             :             CALL copy_dbcsr_to_fm(sparse_matrix, fm_matrix)
     775             :             CALL dbcsr_copy(sparse_matrix3, sparse_matrix)
     776             :          END IF
     777             :          !
     778      189438 :          my_alpha = 1.0_dp
     779      189438 :          IF (PRESENT(alpha)) my_alpha = alpha
     780      189438 :          IF (PRESENT(matrix_g)) THEN
     781       86565 :             IF (my_symmetry_mode == 1) THEN
     782             :                ! Symmetric mode
     783             :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_v, mat_g, &
     784             :                                    1.0_dp, sparse_matrix, &
     785             :                                    retain_sparsity=my_keep_sparsity, &
     786       36314 :                                    last_k=ncol)
     787             :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_g, mat_v, &
     788             :                                    1.0_dp, sparse_matrix, &
     789             :                                    retain_sparsity=my_keep_sparsity, &
     790       36314 :                                    last_k=ncol)
     791       50251 :             ELSE IF (my_symmetry_mode == -1) THEN
     792             :                ! Skewsymmetric mode
     793             :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_v, mat_g, &
     794             :                                    1.0_dp, sparse_matrix, &
     795             :                                    retain_sparsity=my_keep_sparsity, &
     796        2198 :                                    last_k=ncol)
     797             :                CALL dbcsr_multiply("N", "T", -0.5_dp*my_alpha, mat_g, mat_v, &
     798             :                                    1.0_dp, sparse_matrix, &
     799             :                                    retain_sparsity=my_keep_sparsity, &
     800        2198 :                                    last_k=ncol)
     801             :             ELSE
     802             :                ! Normal mode
     803             :                CALL dbcsr_multiply("N", "T", my_alpha, mat_v, mat_g, &
     804             :                                    1.0_dp, sparse_matrix, &
     805             :                                    retain_sparsity=my_keep_sparsity, &
     806       48053 :                                    last_k=ncol)
     807             :             END IF
     808             :          ELSE
     809             :             CALL dbcsr_multiply("N", "T", my_alpha, mat_v, mat_v, &
     810             :                                 1.0_dp, sparse_matrix, &
     811             :                                 retain_sparsity=my_keep_sparsity, &
     812      102873 :                                 last_k=ncol)
     813             :          END IF
     814             : 
     815             :          IF (check_product) THEN
     816             :             IF (PRESENT(matrix_g)) THEN
     817             :                IF (my_symmetry_mode == 1) THEN
     818             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_v, matrix_g, &
     819             :                                   1.0_dp, fm_matrix)
     820             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_g, matrix_v, &
     821             :                                   1.0_dp, fm_matrix)
     822             :                ELSE IF (my_symmetry_mode == -1) THEN
     823             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_v, matrix_g, &
     824             :                                   1.0_dp, fm_matrix)
     825             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, -0.5_dp*my_alpha, matrix_g, matrix_v, &
     826             :                                   1.0_dp, fm_matrix)
     827             :                ELSE
     828             :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, my_alpha, matrix_v, matrix_g, &
     829             :                                   1.0_dp, fm_matrix)
     830             :                END IF
     831             :             ELSE
     832             :                CALL cp_fm_gemm("N", "T", nao, nao, ncol, my_alpha, matrix_v, matrix_v, &
     833             :                                1.0_dp, fm_matrix)
     834             :             END IF
     835             : 
     836             :             CALL dbcsr_copy(sparse_matrix2, sparse_matrix)
     837             :             CALL dbcsr_scale(sparse_matrix2, alpha_scalar=0.0_dp)
     838             :             CALL copy_fm_to_dbcsr(fm_matrix, sparse_matrix2, keep_sparsity=my_keep_sparsity)
     839             :             CALL dbcsr_add(sparse_matrix2, sparse_matrix, alpha_scalar=1.0_dp, &
     840             :                            beta_scalar=-1.0_dp)
     841             :             CALL dbcsr_norm(sparse_matrix2, which_norm=dbcsr_norm_frobenius, &
     842             :                             norm_scalar=norm)
     843             :             WRITE (*, *) 'nao=', nao, ' k=', k, ' ncol=', ncol, ' my_alpha=', my_alpha
     844             :             WRITE (*, *) 'PRESENT (matrix_g)', PRESENT(matrix_g)
     845             :             WRITE (*, *) 'matrix_type=', dbcsr_get_matrix_type(sparse_matrix)
     846             :             WRITE (*, *) 'norm(sm+alpha*v*g^t - fm+alpha*v*g^t)/n=', norm/REAL(nao, dp)
     847             :             IF (norm/REAL(nao, dp) .GT. 1e-12_dp) THEN
     848             :                !WRITE(*,*) 'fm_matrix'
     849             :                !DO j=1,SIZE(fm_matrix%local_data,2)
     850             :                !   DO i=1,SIZE(fm_matrix%local_data,1)
     851             :                !      WRITE(*,'(A,I3,A,I3,A,E26.16,A)') 'a(',i,',',j,')=',fm_matrix%local_data(i,j),';'
     852             :                !   ENDDO
     853             :                !ENDDO
     854             :                !WRITE(*,*) 'mat_v'
     855             :                !CALL dbcsr_print(mat_v,matlab_format=.TRUE.)
     856             :                !WRITE(*,*) 'mat_g'
     857             :                !CALL dbcsr_print(mat_g,matlab_format=.TRUE.)
     858             :                !WRITE(*,*) 'sparse_matrix'
     859             :                !CALL dbcsr_print(sparse_matrix,matlab_format=.TRUE.)
     860             :                !WRITE(*,*) 'sparse_matrix2 (-sm + sparse(fm))'
     861             :                !CALL dbcsr_print(sparse_matrix2,matlab_format=.TRUE.)
     862             :                !WRITE(*,*) 'sparse_matrix3 (copy of sm input)'
     863             :                !CALL dbcsr_print(sparse_matrix3,matlab_format=.TRUE.)
     864             :                !stop
     865             :             END IF
     866             :             CALL dbcsr_release(sparse_matrix2)
     867             :             CALL dbcsr_release(sparse_matrix3)
     868             :             CALL cp_fm_release(fm_matrix)
     869             :          END IF
     870      189438 :          CALL dbcsr_release(mat_v)
     871      189438 :          IF (PRESENT(matrix_g)) CALL dbcsr_release(mat_g)
     872             :       END IF
     873      190918 :       CALL timestop(timing_handle)
     874             : 
     875      190918 :    END SUBROUTINE cp_dbcsr_plus_fm_fm_t
     876             : 
     877             : ! **************************************************************************************************
     878             : !> \brief Utility function to copy a specially shaped fm to dbcsr_matrix
     879             : !>        The result matrix will be the matrix in dbcsr format
     880             : !>        with the row blocks sizes according to the block_sizes of the template
     881             : !>        and the col blocks sizes evenly blocked with the internal dbcsr conversion
     882             : !>        size (32 is the current default)
     883             : !> \param matrix ...
     884             : !> \param fm_in ...
     885             : !> \param template ...
     886             : ! **************************************************************************************************
     887       19904 :    SUBROUTINE cp_fm_to_dbcsr_row_template(matrix, fm_in, template)
     888             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix
     889             :       TYPE(cp_fm_type), INTENT(IN)                    :: fm_in
     890             :       TYPE(dbcsr_type), INTENT(IN)                    :: template
     891             : 
     892             :       INTEGER                                            :: k_in, data_type
     893        4976 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_right_in, row_blk_size
     894             :       TYPE(dbcsr_distribution_type)                       :: tmpl_dist, dist_right_in
     895             : 
     896        4976 :       CALL cp_fm_get_info(fm_in, ncol_global=k_in)
     897             : 
     898        4976 :       CALL dbcsr_get_info(template, distribution=tmpl_dist)
     899        4976 :       CALL dbcsr_create_dist_r_unrot(dist_right_in, tmpl_dist, k_in, col_blk_size_right_in)
     900        4976 :       CALL dbcsr_get_info(template, row_blk_size=row_blk_size, data_type=data_type)
     901             :       CALL dbcsr_create(matrix, "D", dist_right_in, dbcsr_type_no_symmetry, &
     902        4976 :                         row_blk_size, col_blk_size_right_in, nze=0, data_type=data_type)
     903             : 
     904        4976 :       CALL copy_fm_to_dbcsr(fm_in, matrix)
     905        4976 :       DEALLOCATE (col_blk_size_right_in)
     906        4976 :       CALL dbcsr_distribution_release(dist_right_in)
     907             : 
     908        4976 :    END SUBROUTINE cp_fm_to_dbcsr_row_template
     909             : 
     910             : ! **************************************************************************************************
     911             : !> \brief Utility function to create an arbitrary shaped dbcsr matrix
     912             : !>        with the same processor grid as the template matrix
     913             : !>        both row sizes and col sizes are evenly blocked with the internal
     914             : !>        dbcsr_conversion size (32 is the current default)
     915             : !> \param matrix dbcsr matrix to be created
     916             : !> \param template template dbcsr matrix giving its mp_env
     917             : !> \param m global row size of output matrix
     918             : !> \param n global col size of output matrix
     919             : !> \param sym ...
     920             : !> \param data_type ...
     921             : ! **************************************************************************************************
     922      283672 :    SUBROUTINE cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym, data_type)
     923             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix, template
     924             :       INTEGER, INTENT(IN)                                            :: m, n
     925             :       CHARACTER, OPTIONAL, INTENT(IN)                                :: sym
     926             :       INTEGER, OPTIONAL, INTENT(IN)                    :: data_type
     927             : 
     928             :       CHARACTER                                          :: mysym
     929             :       INTEGER                                            :: my_data_type, nprows, npcols
     930      141836 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, &
     931      141836 :                                                             col_dist, row_blk_size, &
     932      141836 :                                                             row_dist
     933             :       TYPE(dbcsr_distribution_type)                       :: tmpl_dist, dist_m_n
     934             : 
     935             :       CALL dbcsr_get_info(template, &
     936             :                           matrix_type=mysym, &
     937             :                           data_type=my_data_type, &
     938      141836 :                           distribution=tmpl_dist)
     939             : 
     940      141836 :       IF (PRESENT(sym)) mysym = sym
     941      141836 :       IF (PRESENT(data_type)) my_data_type = data_type
     942             : 
     943      141836 :       NULLIFY (row_dist, col_dist)
     944      141836 :       NULLIFY (row_blk_size, col_blk_size)
     945             :       !NULLIFY (row_cluster, col_cluster)
     946             : 
     947      141836 :       CALL dbcsr_distribution_get(tmpl_dist, nprows=nprows, npcols=npcols)
     948      141836 :       CALL create_bl_distribution(row_dist, row_blk_size, m, nprows)
     949      141836 :       CALL create_bl_distribution(col_dist, col_blk_size, n, npcols)
     950             :       CALL dbcsr_distribution_new(dist_m_n, template=tmpl_dist, &
     951             :                                   row_dist=row_dist, col_dist=col_dist, &
     952             :                                   !row_cluster=row_cluster, col_cluster=col_cluster, &
     953      141836 :                                   reuse_arrays=.TRUE.)
     954             : 
     955             :       CALL dbcsr_create(matrix, "m_n_template", dist_m_n, mysym, &
     956             :                         row_blk_size, col_blk_size, nze=0, data_type=my_data_type, &
     957      141836 :                         reuse_arrays=.TRUE.)
     958      141836 :       CALL dbcsr_distribution_release(dist_m_n)
     959             : 
     960      141836 :    END SUBROUTINE cp_dbcsr_m_by_n_from_template
     961             : 
     962             : ! **************************************************************************************************
     963             : !> \brief Utility function to create dbcsr matrix, m x n matrix (n arbitrary)
     964             : !>        with the same processor grid and row distribution  as the template matrix
     965             : !>        col sizes are evenly blocked with the internal
     966             : !>        dbcsr_conversion size (32 is the current default)
     967             : !> \param matrix dbcsr matrix to be created
     968             : !> \param template template dbcsr matrix giving its mp_env
     969             : !> \param n global col size of output matrix
     970             : !> \param sym ...
     971             : !> \param data_type ...
     972             : ! **************************************************************************************************
     973      504692 :    SUBROUTINE cp_dbcsr_m_by_n_from_row_template(matrix, template, n, sym, data_type)
     974             :       TYPE(dbcsr_type), INTENT(INOUT)                 :: matrix, template
     975             :       INTEGER                                            :: n
     976             :       CHARACTER, OPTIONAL                                :: sym
     977             :       INTEGER, OPTIONAL                                  :: data_type
     978             : 
     979             :       CHARACTER                                          :: mysym
     980             :       INTEGER                                            :: my_data_type, npcols
     981      126173 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
     982      126173 :                                                             row_dist
     983             :       TYPE(dbcsr_distribution_type)                       :: dist_m_n, tmpl_dist
     984             : 
     985      252346 :       mysym = dbcsr_get_matrix_type(template)
     986      126173 :       IF (PRESENT(sym)) mysym = sym
     987      126173 :       my_data_type = dbcsr_get_data_type(template)
     988      126173 :       IF (PRESENT(data_type)) my_data_type = data_type
     989             : 
     990      126173 :       CALL dbcsr_get_info(template, distribution=tmpl_dist)
     991             :       CALL dbcsr_distribution_get(tmpl_dist, &
     992             :                                   npcols=npcols, &
     993      126173 :                                   row_dist=row_dist)
     994             : 
     995      126173 :       NULLIFY (col_dist, col_blk_size)
     996      126173 :       CALL create_bl_distribution(col_dist, col_blk_size, n, npcols)
     997             :       CALL dbcsr_distribution_new(dist_m_n, template=tmpl_dist, &
     998      126173 :                                   row_dist=row_dist, col_dist=col_dist)
     999             : 
    1000      126173 :       CALL dbcsr_get_info(template, row_blk_size=row_blk_size)
    1001             :       CALL dbcsr_create(matrix, "m_n_template", dist_m_n, mysym, &
    1002      126173 :                         row_blk_size, col_blk_size, nze=0, data_type=my_data_type)
    1003             : 
    1004      126173 :       DEALLOCATE (col_dist, col_blk_size)
    1005      126173 :       CALL dbcsr_distribution_release(dist_m_n)
    1006             : 
    1007      126173 :    END SUBROUTINE cp_dbcsr_m_by_n_from_row_template
    1008             : 
    1009             : ! **************************************************************************************************
    1010             : !> \brief Distributes elements into blocks and into bins
    1011             : !>
    1012             : !> \param[out] block_distribution       block distribution to bins
    1013             : !> \param[out] block_size       sizes of blocks
    1014             : !> \param[in] nelements number of elements to bin
    1015             : !> \param[in] nbins             number of bins
    1016             : !> \par Term clarification
    1017             : !>      An example: blocks are atom blocks and bins are process rows/columns.
    1018             : ! **************************************************************************************************
    1019     1048166 :    SUBROUTINE create_bl_distribution(block_distribution, &
    1020             :                                      block_size, nelements, nbins)
    1021             :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: block_distribution, block_size
    1022             :       INTEGER, INTENT(IN)                                :: nelements, nbins
    1023             : 
    1024             :       CHARACTER(len=*), PARAMETER :: routineN = 'create_bl_distribution', &
    1025             :                                      routineP = moduleN//':'//routineN
    1026             : 
    1027             :       INTEGER                                            :: bin, blk_layer, element_stack, els, &
    1028             :                                                             estimated_blocks, max_blocks_per_bin, &
    1029             :                                                             nblks, nblocks, stat
    1030     1048166 :       INTEGER, DIMENSION(:), POINTER                     :: blk_dist, blk_sizes
    1031             : 
    1032             : !   ---------------------------------------------------------------------------
    1033             : 
    1034     1048166 :       NULLIFY (block_distribution)
    1035     1048166 :       NULLIFY (block_size)
    1036             :       ! Define the sizes on which we build the distribution.
    1037     1048166 :       IF (nelements .GT. 0) THEN
    1038             : 
    1039     1039444 :          nblocks = CEILING(REAL(nelements, KIND=dp)/REAL(max_elements_per_block, KIND=dp))
    1040     1039444 :          max_blocks_per_bin = CEILING(REAL(nblocks, KIND=dp)/REAL(nbins, KIND=dp))
    1041             : 
    1042             :          IF (debug_mod) THEN
    1043             :             WRITE (*, '(1X,A,1X,A,I7,A,I7,A)') routineP, "For", nelements, &
    1044             :                " elements and", nbins, " bins"
    1045             :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1046             :                max_elements_per_block, " max elements per block"
    1047             :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1048             :                nblocks, " blocks"
    1049             :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1050             :                max_blocks_per_bin, " max blocks/bin"
    1051             :          END IF
    1052             : 
    1053     1039444 :          estimated_blocks = max_blocks_per_bin*nbins
    1054     3118332 :          ALLOCATE (blk_dist(estimated_blocks), stat=stat)
    1055     1039444 :          IF (stat /= 0) &
    1056           0 :             CPABORT("blk_dist")
    1057     2078888 :          ALLOCATE (blk_sizes(estimated_blocks), stat=stat)
    1058             :          IF (stat /= 0) &
    1059           0 :             CPABORT("blk_sizes")
    1060     1039444 :          element_stack = 0
    1061     1039444 :          nblks = 0
    1062     2140380 :          DO blk_layer = 1, max_blocks_per_bin
    1063     3364424 :             DO bin = 0, nbins - 1
    1064     1224044 :                els = MIN(max_elements_per_block, nelements - element_stack)
    1065     2324980 :                IF (els .GT. 0) THEN
    1066     1112352 :                   element_stack = element_stack + els
    1067     1112352 :                   nblks = nblks + 1
    1068     1112352 :                   blk_dist(nblks) = bin
    1069     1112352 :                   blk_sizes(nblks) = els
    1070             :                   IF (debug_mod) WRITE (*, '(1X,A,I5,A,I5,A,I5)') routineP//" Assigning", &
    1071             :                      els, " elements as block", nblks, " to bin", bin
    1072             :                END IF
    1073             :             END DO
    1074             :          END DO
    1075             :          ! Create the output arrays.
    1076     1039444 :          IF (nblks .EQ. estimated_blocks) THEN
    1077      927752 :             block_distribution => blk_dist
    1078      927752 :             block_size => blk_sizes
    1079             :          ELSE
    1080      335076 :             ALLOCATE (block_distribution(nblks), stat=stat)
    1081             :             IF (stat /= 0) &
    1082           0 :                CPABORT("blk_dist")
    1083      452288 :             block_distribution(:) = blk_dist(1:nblks)
    1084      111692 :             DEALLOCATE (blk_dist)
    1085      223384 :             ALLOCATE (block_size(nblks), stat=stat)
    1086             :             IF (stat /= 0) &
    1087           0 :                CPABORT("blk_sizes")
    1088      452288 :             block_size(:) = blk_sizes(1:nblks)
    1089      111692 :             DEALLOCATE (blk_sizes)
    1090             :          END IF
    1091             :       ELSE
    1092        8722 :          ALLOCATE (block_distribution(0), stat=stat)
    1093             :          IF (stat /= 0) &
    1094           0 :             CPABORT("blk_dist")
    1095        8722 :          ALLOCATE (block_size(0), stat=stat)
    1096             :          IF (stat /= 0) &
    1097           0 :             CPABORT("blk_sizes")
    1098             :       END IF
    1099             : 1579  FORMAT(I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5)
    1100             :       IF (debug_mod) THEN
    1101             :          WRITE (*, '(1X,A,A)') routineP//" Distribution"
    1102             :          WRITE (*, 1579) block_distribution(:)
    1103             :          WRITE (*, '(1X,A,A)') routineP//" Sizes"
    1104             :          WRITE (*, 1579) block_size(:)
    1105             :       END IF
    1106     1048166 :    END SUBROUTINE create_bl_distribution
    1107             : 
    1108             : ! **************************************************************************************************
    1109             : !> \brief Creates a new distribution for the right matrix in a matrix
    1110             : !>        multiplication with unrotated grid.
    1111             : !> \param[out] dist_right     new distribution for the right matrix
    1112             : !> \param[in] dist_left       the distribution of the left matrix
    1113             : !> \param[in] ncolumns        number of columns in right matrix
    1114             : !> \param[out] right_col_blk_sizes      sizes of blocks in the created column
    1115             : !> \par The new row distribution for the right matrix is the same as the row
    1116             : !>      distribution of the left matrix, while the column distribution is
    1117             : !>      created so that it is appropriate to the parallel environment.
    1118             : ! **************************************************************************************************
    1119      448883 :    SUBROUTINE dbcsr_create_dist_r_unrot(dist_right, dist_left, ncolumns, &
    1120             :                                         right_col_blk_sizes)
    1121             :       TYPE(dbcsr_distribution_type), INTENT(OUT)          :: dist_right
    1122             :       TYPE(dbcsr_distribution_type), INTENT(IN)           :: dist_left
    1123             :       INTEGER, INTENT(IN)                                :: ncolumns
    1124             :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: right_col_blk_sizes
    1125             : 
    1126             :       INTEGER                                            :: multiplicity, nimages, ncols, nprows, npcols
    1127             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: tmp_images
    1128      448883 :       INTEGER, DIMENSION(:), POINTER                     :: right_col_dist, right_row_dist, old_col_dist, dummy
    1129             : 
    1130             :       CALL dbcsr_distribution_get(dist_left, &
    1131             :                                   ncols=ncols, &
    1132             :                                   col_dist=old_col_dist, &
    1133             :                                   nprows=nprows, &
    1134      448883 :                                   npcols=npcols)
    1135             : 
    1136             :       ! Create the column distribution
    1137      448883 :       CALL create_bl_distribution(right_col_dist, right_col_blk_sizes, ncolumns, npcols)
    1138             :       ! Create an even row distribution.
    1139     1795532 :       ALLOCATE (right_row_dist(ncols), tmp_images(ncols))
    1140      448883 :       nimages = lcm(nprows, npcols)/nprows
    1141      448883 :       multiplicity = nprows/gcd(nprows, npcols)
    1142      448883 :       CALL rebin_distribution(right_row_dist, tmp_images, old_col_dist, nprows, multiplicity, nimages)
    1143             : 
    1144      448883 :       NULLIFY (dummy)
    1145             :       CALL dbcsr_distribution_new(dist_right, &
    1146             :                                   template=dist_left, &
    1147             :                                   row_dist=right_row_dist, &
    1148             :                                   col_dist=right_col_dist, &
    1149             :                                   !row_cluster=dummy,&
    1150             :                                   !col_cluster=dummy,&
    1151      448883 :                                   reuse_arrays=.TRUE.)
    1152      448883 :       DEALLOCATE (tmp_images)
    1153      448883 :    END SUBROUTINE dbcsr_create_dist_r_unrot
    1154             : 
    1155             : ! **************************************************************************************************
    1156             : !> \brief Makes new distribution with decimation and multiplicity
    1157             : !> \param[out] new_bins      new real distribution
    1158             : !> \param[out] images        new image distribution
    1159             : !> \param[in] source_bins    Basis for the new distribution and images
    1160             : !> \param[in] nbins          number of bins in the new real distribution
    1161             : !> \param[in] multiplicity   multiplicity
    1162             : !> \param[in] nimages        number of images in the new distribution
    1163             : !> \par Definition of multiplicity and nimages
    1164             : !>      Multiplicity and decimation (number of images) are used to
    1165             : !>      match process grid coordinates on non-square process
    1166             : !>      grids. Given source_nbins and target_nbins, their relation is
    1167             : !>                source_nbins * target_multiplicity
    1168             : !>              = target_nbins * target_nimages.
    1169             : !>      It is best when both multiplicity and nimages are small. To
    1170             : !>      get these two factors, then, one can use the following formulas:
    1171             : !>          nimages      = lcm(source_nbins, target_nbins) / target_nbins
    1172             : !>          multiplicity = target_nbins / gcd(source_nbins, target_nbins)
    1173             : !>      from the target's point of view (nimages = target_nimages).
    1174             : !> \par Mapping
    1175             : !>      The new distribution comprises of real bins and images within
    1176             : !>      bins. These can be view as target_nbins*nimages virtual
    1177             : !>      columns. These same virtual columns are also
    1178             : !>      source_nbins*multiplicity in number. Therefore these virtual
    1179             : !>      columns are mapped from source_nbins*multiplicity onto
    1180             : !>      target_bins*nimages (each target bin has nimages images):
    1181             : !>      Source 4: |1 2 3|4 5 6|7 8 9|A B C| (4*3)
    1182             : !>      Target 6: |1 2|3 4|5 6|7 8|9 A|B C| (6*2)
    1183             : !>      multiplicity=3, nimages=2, 12 virtual columns (1-C).
    1184             : !>      Source bin elements are evenly mapped into one of multiplicity
    1185             : !>      virtual columns. Other (non-even, block-size aware) mappings
    1186             : !>      could be better.
    1187             : ! **************************************************************************************************
    1188      448883 :    SUBROUTINE rebin_distribution(new_bins, images, source_bins, &
    1189             :                                  nbins, multiplicity, nimages)
    1190             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: new_bins, images
    1191             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: source_bins
    1192             :       INTEGER, INTENT(IN)                                :: nbins, multiplicity, nimages
    1193             : 
    1194             :       INTEGER                                            :: bin, i, old_nbins, virtual_bin
    1195      448883 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: bin_multiplier
    1196             : 
    1197             : !   ---------------------------------------------------------------------------
    1198             : 
    1199      448883 :       IF (MOD(nbins*nimages, multiplicity) .NE. 0) THEN
    1200           0 :          CPWARN("mulitplicity is not divisor of new process grid coordinate")
    1201             :       END IF
    1202      448883 :       old_nbins = (nbins*nimages)/multiplicity
    1203     1346649 :       ALLOCATE (bin_multiplier(0:old_nbins - 1))
    1204      897766 :       bin_multiplier(:) = 0
    1205     2133341 :       DO i = 1, SIZE(new_bins)
    1206     1684458 :          IF (i .LE. SIZE(source_bins)) THEN
    1207     1684458 :             bin = source_bins(i)
    1208             :          ELSE
    1209             :             ! Fill remainder with a cyclic distribution
    1210           0 :             bin = MOD(i, old_nbins)
    1211             :          END IF
    1212     1684458 :          virtual_bin = bin*multiplicity + bin_multiplier(bin)
    1213     1684458 :          new_bins(i) = virtual_bin/nimages
    1214     1684458 :          images(i) = 1 + MOD(virtual_bin, nimages)
    1215     1684458 :          bin_multiplier(bin) = bin_multiplier(bin) + 1
    1216     2133341 :          IF (bin_multiplier(bin) .GE. multiplicity) THEN
    1217      720646 :             bin_multiplier(bin) = 0
    1218             :          END IF
    1219             :       END DO
    1220      448883 :    END SUBROUTINE rebin_distribution
    1221             : 
    1222             : ! **************************************************************************************************
    1223             : !> \brief Creates a block-cyclic compatible distribution
    1224             : !>
    1225             : !>        All blocks in a dimension, except for possibly the last
    1226             : !>        block, have the same size.
    1227             : !> \param[out] dist           the elemental distribution
    1228             : !> \param[in] nrows           number of full rows
    1229             : !> \param[in] ncolumns        number of full columns
    1230             : !> \param[in] nrow_block      size of row blocks
    1231             : !> \param[in] ncol_block      size of column blocks
    1232             : !> \param[in] mp_env          multiprocess environment
    1233             : !> \param[out] row_blk_sizes  row block sizes
    1234             : !> \param[out] col_blk_sizes  column block sizes
    1235             : ! **************************************************************************************************
    1236     2052336 :    SUBROUTINE dbcsr_create_dist_block_cyclic(dist, nrows, ncolumns, &
    1237             :                                              nrow_block, ncol_block, group_handle, pgrid, row_blk_sizes, col_blk_sizes)
    1238             :       TYPE(dbcsr_distribution_type), INTENT(OUT)          :: dist
    1239             :       INTEGER, INTENT(IN)                                :: nrows, ncolumns, nrow_block, ncol_block
    1240             :       INTEGER, INTENT(IN)                                :: group_handle
    1241             :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1242             :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: row_blk_sizes, col_blk_sizes
    1243             : 
    1244             :       CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_dist_block_cyclic'
    1245             : 
    1246             :       INTEGER                                            :: nblkcols, nblkrows, npcols, nprows, &
    1247             :                                                             pdim, sz
    1248     2052336 :       INTEGER, DIMENSION(:), POINTER                     :: cd_data, rd_data
    1249             : 
    1250             :       ! Row sizes
    1251     2052336 :       IF (nrow_block .EQ. 0) THEN
    1252             :          nblkrows = 0
    1253             :          sz = 0
    1254             :       ELSE
    1255     2052336 :          nblkrows = nrows/nrow_block
    1256     2052336 :          sz = MOD(nrows, nrow_block)
    1257             :       END IF
    1258     2052336 :       IF (sz .GT. 0) nblkrows = nblkrows + 1
    1259    10260048 :       ALLOCATE (row_blk_sizes(nblkrows), rd_data(nblkrows))
    1260     6520436 :       row_blk_sizes = nrow_block
    1261     2052336 :       IF (sz .NE. 0) row_blk_sizes(nblkrows) = sz
    1262             : 
    1263             :       ! Column sizes
    1264     2052336 :       IF (ncol_block .EQ. 0) THEN
    1265             :          nblkcols = 0
    1266             :          sz = 0
    1267             :       ELSE
    1268     2052336 :          nblkcols = ncolumns/ncol_block
    1269     2052336 :          sz = MOD(ncolumns, ncol_block)
    1270             :       END IF
    1271     2052336 :       IF (sz .GT. 0) nblkcols = nblkcols + 1
    1272    10256334 :       ALLOCATE (col_blk_sizes(nblkcols), cd_data(nblkcols))
    1273     9093571 :       col_blk_sizes = ncol_block
    1274     2052336 :       IF (sz .NE. 0) col_blk_sizes(nblkcols) = sz
    1275             :       !
    1276             :       IF (debug_mod) THEN
    1277             :          WRITE (*, *) routineN//" nrows,nrow_block,nblkrows=", &
    1278             :             nrows, nrow_block, nblkrows
    1279             :          WRITE (*, *) routineN//" ncols,ncol_block,nblkcols=", &
    1280             :             ncolumns, ncol_block, nblkcols
    1281             :       END IF
    1282             :       ! Calculate process row distribution
    1283     2052336 :       nprows = SIZE(pgrid, 1)
    1284     5551672 :       DO pdim = 0, MIN(nprows - 1, nblkrows - 1)
    1285    10019772 :          rd_data(1 + pdim:nblkrows:nprows) = pdim
    1286             :       END DO
    1287             :       ! Calculate process column distribution
    1288     2052336 :       npcols = SIZE(pgrid, 2)
    1289     4102890 :       DO pdim = 0, MIN(npcols - 1, nblkcols - 1)
    1290    11144125 :          cd_data(1 + pdim:nblkcols:npcols) = pdim
    1291             :       END DO
    1292             :       !
    1293             :       IF (debug_mod) THEN
    1294             :          WRITE (*, *) routineN//" row_dist", &
    1295             :             rd_data
    1296             :          WRITE (*, *) routineN//" col_dist", &
    1297             :             cd_data
    1298             :       END IF
    1299             :       !
    1300             :       CALL dbcsr_distribution_new(dist, &
    1301             :                                   group=group_handle, pgrid=pgrid, &
    1302             :                                   row_dist=rd_data, &
    1303             :                                   col_dist=cd_data, &
    1304     2052336 :                                   reuse_arrays=.TRUE.)
    1305             : 
    1306     2052336 :    END SUBROUTINE dbcsr_create_dist_block_cyclic
    1307             : 
    1308             : ! **************************************************************************************************
    1309             : !> \brief   Allocate and initialize a real matrix 1-dimensional set.
    1310             : !> \param[in,out] matrix_set  Set containing the DBCSR matrices
    1311             : !> \param[in] nmatrix         Size of set
    1312             : !> \par History
    1313             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1314             : ! **************************************************************************************************
    1315      176341 :    SUBROUTINE allocate_dbcsr_matrix_set_1d(matrix_set, nmatrix)
    1316             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_set
    1317             :       INTEGER, INTENT(IN)                                :: nmatrix
    1318             : 
    1319             :       INTEGER                                            :: imatrix
    1320             : 
    1321      176341 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1322      827610 :       ALLOCATE (matrix_set(nmatrix))
    1323      474928 :       DO imatrix = 1, nmatrix
    1324      474928 :          NULLIFY (matrix_set(imatrix)%matrix)
    1325             :       END DO
    1326      176341 :    END SUBROUTINE allocate_dbcsr_matrix_set_1d
    1327             : 
    1328             : ! **************************************************************************************************
    1329             : !> \brief   Allocate and initialize a real matrix 2-dimensional set.
    1330             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1331             : !> \param[in] nmatrix         Size of set
    1332             : !> \param mmatrix ...
    1333             : !> \par History
    1334             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1335             : ! **************************************************************************************************
    1336      143052 :    SUBROUTINE allocate_dbcsr_matrix_set_2d(matrix_set, nmatrix, mmatrix)
    1337             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: matrix_set
    1338             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix
    1339             : 
    1340             :       INTEGER                                            :: imatrix, jmatrix
    1341             : 
    1342      143052 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1343     2163642 :       ALLOCATE (matrix_set(nmatrix, mmatrix))
    1344      883568 :       DO jmatrix = 1, mmatrix
    1345     1734486 :          DO imatrix = 1, nmatrix
    1346     1591434 :             NULLIFY (matrix_set(imatrix, jmatrix)%matrix)
    1347             :          END DO
    1348             :       END DO
    1349      143052 :    END SUBROUTINE allocate_dbcsr_matrix_set_2d
    1350             : 
    1351             : ! **************************************************************************************************
    1352             : !> \brief   Allocate and initialize a real matrix 3-dimensional set.
    1353             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1354             : !> \param[in] nmatrix         Size of set
    1355             : !> \param mmatrix ...
    1356             : !> \param pmatrix ...
    1357             : !> \par History
    1358             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1359             : ! **************************************************************************************************
    1360           0 :    SUBROUTINE allocate_dbcsr_matrix_set_3d(matrix_set, nmatrix, mmatrix, pmatrix)
    1361             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :), POINTER    :: matrix_set
    1362             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix
    1363             : 
    1364             :       INTEGER                                            :: imatrix, jmatrix, kmatrix
    1365             : 
    1366           0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1367           0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix))
    1368           0 :       DO kmatrix = 1, pmatrix
    1369           0 :          DO jmatrix = 1, mmatrix
    1370           0 :             DO imatrix = 1, nmatrix
    1371           0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix)%matrix)
    1372             :             END DO
    1373             :          END DO
    1374             :       END DO
    1375           0 :    END SUBROUTINE allocate_dbcsr_matrix_set_3d
    1376             : 
    1377             : ! **************************************************************************************************
    1378             : !> \brief   Allocate and initialize a real matrix 4-dimensional set.
    1379             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1380             : !> \param[in] nmatrix         Size of set
    1381             : !> \param mmatrix ...
    1382             : !> \param pmatrix ...
    1383             : !> \par History
    1384             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1385             : ! **************************************************************************************************
    1386           0 :    SUBROUTINE allocate_dbcsr_matrix_set_4d(matrix_set, nmatrix, mmatrix, pmatrix, qmatrix)
    1387             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :), POINTER    :: matrix_set
    1388             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix, qmatrix
    1389             : 
    1390             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix
    1391             : 
    1392           0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1393           0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix, qmatrix))
    1394           0 :       DO lmatrix = 1, qmatrix
    1395           0 :       DO kmatrix = 1, pmatrix
    1396           0 :          DO jmatrix = 1, mmatrix
    1397           0 :             DO imatrix = 1, nmatrix
    1398           0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix, lmatrix)%matrix)
    1399             :             END DO
    1400             :          END DO
    1401             :       END DO
    1402             :       END DO
    1403           0 :    END SUBROUTINE allocate_dbcsr_matrix_set_4d
    1404             : 
    1405             : ! **************************************************************************************************
    1406             : !> \brief   Allocate and initialize a real matrix 5-dimensional set.
    1407             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1408             : !> \param[in] nmatrix         Size of set
    1409             : !> \param mmatrix ...
    1410             : !> \param pmatrix ...
    1411             : !> \par History
    1412             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1413             : ! **************************************************************************************************
    1414           0 :    SUBROUTINE allocate_dbcsr_matrix_set_5d(matrix_set, nmatrix, mmatrix, pmatrix, qmatrix, smatrix)
    1415             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :, :), POINTER    :: matrix_set
    1416             :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix, qmatrix, smatrix
    1417             : 
    1418             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix, hmatrix
    1419             : 
    1420           0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1421           0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix, qmatrix, smatrix))
    1422           0 :       DO hmatrix = 1, smatrix
    1423           0 :       DO lmatrix = 1, qmatrix
    1424           0 :       DO kmatrix = 1, pmatrix
    1425           0 :          DO jmatrix = 1, mmatrix
    1426           0 :             DO imatrix = 1, nmatrix
    1427           0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix, lmatrix, hmatrix)%matrix)
    1428             :             END DO
    1429             :          END DO
    1430             :       END DO
    1431             :       END DO
    1432             :       END DO
    1433           0 :    END SUBROUTINE allocate_dbcsr_matrix_set_5d
    1434             : 
    1435             :    ! **************************************************************************************************
    1436             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1437             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1438             : !> \par History
    1439             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1440             : ! **************************************************************************************************
    1441      175152 :    SUBROUTINE deallocate_dbcsr_matrix_set_1d(matrix_set)
    1442             : 
    1443             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_set
    1444             : 
    1445             :       INTEGER                                            :: imatrix
    1446             : 
    1447      175152 :       IF (ASSOCIATED(matrix_set)) THEN
    1448      472356 :          DO imatrix = 1, SIZE(matrix_set)
    1449      472356 :             CALL dbcsr_deallocate_matrix(matrix_set(imatrix)%matrix)
    1450             :          END DO
    1451      173662 :          DEALLOCATE (matrix_set)
    1452             :       END IF
    1453             : 
    1454      175152 :    END SUBROUTINE deallocate_dbcsr_matrix_set_1d
    1455             : 
    1456             : ! **************************************************************************************************
    1457             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1458             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1459             : !> \par History
    1460             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1461             : ! **************************************************************************************************
    1462      147317 :    SUBROUTINE deallocate_dbcsr_matrix_set_2d(matrix_set)
    1463             : 
    1464             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: matrix_set
    1465             : 
    1466             :       INTEGER                                            :: imatrix, jmatrix
    1467             : 
    1468      147317 :       IF (ASSOCIATED(matrix_set)) THEN
    1469      889080 :          DO jmatrix = 1, SIZE(matrix_set, 2)
    1470     1745407 :             DO imatrix = 1, SIZE(matrix_set, 1)
    1471     1601092 :                CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix)%matrix)
    1472             :             END DO
    1473             :          END DO
    1474      144315 :          DEALLOCATE (matrix_set)
    1475             :       END IF
    1476      147317 :    END SUBROUTINE deallocate_dbcsr_matrix_set_2d
    1477             : 
    1478             : ! **************************************************************************************************
    1479             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1480             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1481             : !> \par History
    1482             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1483             : ! **************************************************************************************************
    1484           0 :    SUBROUTINE deallocate_dbcsr_matrix_set_3d(matrix_set)
    1485             : 
    1486             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :), POINTER    :: matrix_set
    1487             : 
    1488             :       INTEGER                                            :: imatrix, jmatrix, kmatrix
    1489             : 
    1490           0 :       IF (ASSOCIATED(matrix_set)) THEN
    1491           0 :          DO kmatrix = 1, SIZE(matrix_set, 3)
    1492           0 :             DO jmatrix = 1, SIZE(matrix_set, 2)
    1493           0 :                DO imatrix = 1, SIZE(matrix_set, 1)
    1494           0 :                   CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix)%matrix)
    1495             :                END DO
    1496             :             END DO
    1497             :          END DO
    1498           0 :          DEALLOCATE (matrix_set)
    1499             :       END IF
    1500           0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_3d
    1501             : 
    1502             : ! **************************************************************************************************
    1503             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1504             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1505             : !> \par History
    1506             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1507             : ! **************************************************************************************************
    1508           0 :    SUBROUTINE deallocate_dbcsr_matrix_set_4d(matrix_set)
    1509             : 
    1510             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :), POINTER    :: matrix_set
    1511             : 
    1512             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix
    1513             : 
    1514           0 :       IF (ASSOCIATED(matrix_set)) THEN
    1515           0 :          DO lmatrix = 1, SIZE(matrix_set, 4)
    1516           0 :          DO kmatrix = 1, SIZE(matrix_set, 3)
    1517           0 :             DO jmatrix = 1, SIZE(matrix_set, 2)
    1518           0 :                DO imatrix = 1, SIZE(matrix_set, 1)
    1519           0 :                   CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix, lmatrix)%matrix)
    1520             :                END DO
    1521             :             END DO
    1522             :          END DO
    1523             :          END DO
    1524           0 :          DEALLOCATE (matrix_set)
    1525             :       END IF
    1526           0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_4d
    1527             : 
    1528             : ! **************************************************************************************************
    1529             : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1530             : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1531             : !> \par History
    1532             : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1533             : ! **************************************************************************************************
    1534           0 :    SUBROUTINE deallocate_dbcsr_matrix_set_5d(matrix_set)
    1535             : 
    1536             :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :, :), POINTER    :: matrix_set
    1537             : 
    1538             :       INTEGER                                            :: imatrix, jmatrix, kmatrix, hmatrix, lmatrix
    1539             : 
    1540           0 :       IF (ASSOCIATED(matrix_set)) THEN
    1541           0 :          DO hmatrix = 1, SIZE(matrix_set, 5)
    1542           0 :             DO lmatrix = 1, SIZE(matrix_set, 4)
    1543           0 :             DO kmatrix = 1, SIZE(matrix_set, 3)
    1544           0 :                DO jmatrix = 1, SIZE(matrix_set, 2)
    1545           0 :                   DO imatrix = 1, SIZE(matrix_set, 1)
    1546           0 :                      CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix, lmatrix, hmatrix)%matrix)
    1547             :                   END DO
    1548             :                END DO
    1549             :             END DO
    1550             :             END DO
    1551             :          END DO
    1552           0 :          DEALLOCATE (matrix_set)
    1553             :       END IF
    1554           0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_5d
    1555             : 
    1556             : END MODULE cp_dbcsr_operations

Generated by: LCOV version 1.15