LCOV - code coverage report
Current view: top level - src - mp2_ri_grad_util.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b8e0b09) Lines: 744 756 98.4 %
Date: 2024-08-31 06:31:37 Functions: 7 9 77.8 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines for calculating RI-MP2 gradients
      10             : !> \par History
      11             : !>      10.2013 created [Mauro Del Ben]
      12             : ! **************************************************************************************************
      13             : MODULE mp2_ri_grad_util
      14             : !
      15             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
      16             :                                               cp_blacs_env_release,&
      17             :                                               cp_blacs_env_type
      18             :    USE cp_dbcsr_api,                    ONLY: dbcsr_p_type,&
      19             :                                               dbcsr_type,&
      20             :                                               dbcsr_type_no_symmetry
      21             :    USE cp_dbcsr_operations,             ONLY: copy_fm_to_dbcsr,&
      22             :                                               cp_dbcsr_m_by_n_from_template
      23             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_geadd
      24             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      25             :                                               cp_fm_struct_get,&
      26             :                                               cp_fm_struct_release,&
      27             :                                               cp_fm_struct_type
      28             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      29             :                                               cp_fm_get_info,&
      30             :                                               cp_fm_release,&
      31             :                                               cp_fm_set_all,&
      32             :                                               cp_fm_to_fm,&
      33             :                                               cp_fm_type
      34             :    USE group_dist_types,                ONLY: create_group_dist,&
      35             :                                               get_group_dist,&
      36             :                                               group_dist_d1_type,&
      37             :                                               release_group_dist
      38             :    USE kinds,                           ONLY: dp
      39             :    USE libint_2c_3c,                    ONLY: compare_potential_types
      40             :    USE message_passing,                 ONLY: mp_para_env_type,&
      41             :                                               mp_request_null,&
      42             :                                               mp_request_type,&
      43             :                                               mp_waitall
      44             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      45             :                                               mp2_type
      46             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      47             :    USE util,                            ONLY: get_limit
      48             : #include "./base/base_uses.f90"
      49             : 
      50             :    IMPLICIT NONE
      51             : 
      52             :    PRIVATE
      53             : 
      54             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
      55             : 
      56             :    PUBLIC :: complete_gamma, array2fm, fm2array, prepare_redistribution, create_dbcsr_gamma
      57             : 
      58             :    TYPE index_map
      59             :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
      60             :    END TYPE
      61             : 
      62             : CONTAINS
      63             : 
      64             : ! **************************************************************************************************
      65             : !> \brief complete the calculation of the Gamma matrices
      66             : !> \param mp2_env ...
      67             : !> \param B_ia_Q ...
      68             : !> \param dimen_RI ...
      69             : !> \param homo ...
      70             : !> \param virtual ...
      71             : !> \param para_env ...
      72             : !> \param para_env_sub ...
      73             : !> \param ngroup ...
      74             : !> \param my_group_L_size ...
      75             : !> \param my_group_L_start ...
      76             : !> \param my_group_L_end ...
      77             : !> \param my_B_size ...
      78             : !> \param my_B_virtual_start ...
      79             : !> \param gd_array ...
      80             : !> \param gd_B_virtual ...
      81             : !> \param kspin ...
      82             : !> \author Mauro Del Ben
      83             : ! **************************************************************************************************
      84         296 :    SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
      85             :                              my_group_L_size, my_group_L_start, my_group_L_end, &
      86             :                              my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
      87             : 
      88             :       TYPE(mp2_type)                                     :: mp2_env
      89             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
      90             :          INTENT(INOUT)                                   :: B_ia_Q
      91             :       INTEGER, INTENT(IN)                                :: dimen_RI, homo, virtual
      92             :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env, para_env_sub
      93             :       INTEGER, INTENT(IN)                                :: ngroup, my_group_L_size, &
      94             :                                                             my_group_L_start, my_group_L_end, &
      95             :                                                             my_B_size, my_B_virtual_start
      96             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_array, gd_B_virtual
      97             :       INTEGER, INTENT(IN)                                :: kspin
      98             : 
      99             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'complete_gamma'
     100             : 
     101             :       INTEGER :: dimen_ia, handle, i, ispin, kkB, my_ia_end, my_ia_size, my_ia_start, my_P_end, &
     102             :          my_P_size, my_P_start, nspins, pos_group, pos_sub
     103         296 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: pos_info
     104         296 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: group_grid_2_mepos, mepos_2_grid_group
     105         296 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: BIb_C_2D, Gamma_2D, Gamma_PQ
     106             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     107             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_ia, fm_struct_RI
     108             :       TYPE(cp_fm_type) :: fm_Gamma, fm_Gamma_PQ, fm_Gamma_PQ_2, fm_Gamma_PQ_temp, &
     109             :          fm_Gamma_PQ_temp_2, fm_ia_P, fm_Y, operator_half, PQ_half
     110         296 :       TYPE(group_dist_d1_type)                           :: gd_array_new, gd_ia, gd_ia_new, gd_P, &
     111         296 :                                                             gd_P_new
     112             : 
     113         296 :       CALL timeset(routineN, handle)
     114             : 
     115             :       ! reshape the data into a 2D array
     116             :       ! reorder the local data in such a way to help the next stage of matrix creation
     117             :       ! now the data inside the group are divided into a ia x K matrix
     118         296 :       dimen_ia = homo*virtual
     119         296 :       CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
     120         296 :       CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
     121             : 
     122             :       CALL mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     123             :                         my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     124             : 
     125             :       CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, Gamma_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     126         296 :                               my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     127             : 
     128             :       ! create the processor map and size arrays
     129         296 :       CALL create_group_dist(gd_ia_new, para_env%num_pe)
     130         296 :       CALL create_group_dist(gd_array_new, para_env%num_pe)
     131         296 :       CALL create_group_dist(gd_P, para_env_sub%num_pe, dimen_RI)
     132         296 :       CALL create_group_dist(gd_P_new, para_env%num_pe)
     133             : 
     134         296 :       CALL get_group_dist(gd_P, para_env_sub%mepos, my_P_start, my_P_end, my_P_size)
     135             : 
     136             :       CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
     137             :                                   group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
     138             : 
     139         888 :       DO i = 0, para_env%num_pe - 1
     140             :          ! calculate position of the group
     141         592 :          pos_group = i/para_env_sub%num_pe
     142             :          ! calculate position in the subgroup
     143         592 :          pos_sub = pos_info(i)
     144             :          ! 1 -> rows, 2 -> cols
     145         592 :          CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
     146         592 :          CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
     147         888 :          CALL get_group_dist(gd_P, pos_sub, gd_P_new, i)
     148             :       END DO
     149             : 
     150             :       ! create the blacs env
     151         296 :       NULLIFY (blacs_env)
     152         296 :       CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
     153             : 
     154         296 :       NULLIFY (fm_struct_ia)
     155             :       CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
     156         296 :                                ncol_global=dimen_RI, para_env=para_env)
     157             : 
     158             :       ! create the fm matrix Gamma
     159             :       CALL array2fm(Gamma_2D, fm_struct_ia, my_ia_start, my_ia_end, &
     160             :                     my_group_L_start, my_group_L_end, &
     161             :                     gd_ia_new, gd_array_new, &
     162             :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     163         296 :                     fm_Y)
     164             :       ! create the fm matrix B_ia_P
     165             :       CALL array2fm(BIb_C_2D, fm_struct_ia, my_ia_start, my_ia_end, &
     166             :                     my_group_L_start, my_group_L_end, &
     167             :                     gd_ia_new, gd_array_new, &
     168             :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     169         296 :                     fm_ia_P)
     170             : 
     171         296 :       NULLIFY (fm_struct_RI)
     172             :       CALL cp_fm_struct_create(fm_struct_RI, context=blacs_env, nrow_global=dimen_RI, &
     173         296 :                                ncol_global=dimen_RI, para_env=para_env)
     174             : 
     175             :       ! since we will need (P|Q)^(-1/2) in the future, make a copy
     176             :       CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_RI, my_P_start, my_P_end, &
     177             :                     my_group_L_start, my_group_L_end, &
     178             :                     gd_P_new, gd_array_new, &
     179             :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     180         296 :                     PQ_half, do_release_mat=.FALSE.)
     181             : 
     182         296 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     183             :          CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_RI, my_P_start, my_P_end, &
     184             :                        my_group_L_start, my_group_L_end, &
     185             :                        gd_P_new, gd_array_new, &
     186             :                        group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     187           8 :                        operator_half, do_release_mat=.FALSE.)
     188             :       END IF
     189             : 
     190         296 :       CALL release_group_dist(gd_P_new)
     191         296 :       CALL release_group_dist(gd_ia_new)
     192         296 :       CALL release_group_dist(gd_array_new)
     193             : 
     194             :       ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
     195         296 :       IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     196         288 :          CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
     197         288 :          CALL cp_fm_struct_release(fm_struct_ia)
     198             :          ! perform the matrix multiplication
     199             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     200             :                             matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
     201         288 :                             matrix_c=fm_Gamma)
     202             :          ! release the Y matrix
     203         288 :          CALL cp_fm_release(fm_Y)
     204             : 
     205             :          ! complete gamma small (fm_Gamma_PQ)
     206             :          ! create temp matrix
     207         288 :          CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
     208             :          CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
     209             :                             matrix_a=fm_Gamma, matrix_b=fm_ia_P, beta=0.0_dp, &
     210         288 :                             matrix_c=fm_Gamma_PQ_temp)
     211         288 :          CALL cp_fm_release(fm_ia_P)
     212             :          ! create fm_Gamma_PQ matrix
     213         288 :          CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI, name="fm_Gamma_PQ")
     214         288 :          CALL cp_fm_struct_release(fm_struct_RI)
     215             :          ! perform matrix multiplication
     216             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     217             :                             matrix_a=fm_Gamma_PQ_temp, matrix_b=PQ_half, beta=0.0_dp, &
     218         288 :                             matrix_c=fm_Gamma_PQ)
     219         288 :          CALL cp_fm_release(fm_Gamma_PQ_temp)
     220         288 :          CALL cp_fm_release(PQ_half)
     221             : 
     222             :       ELSE
     223             :          ! create temp matrix
     224           8 :          CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
     225             :          CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
     226             :                             matrix_a=fm_Y, matrix_b=fm_ia_P, beta=0.0_dp, &
     227           8 :                             matrix_c=fm_Gamma_PQ_temp)
     228           8 :          CALL cp_fm_release(fm_ia_P)
     229             : 
     230           8 :          CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
     231           8 :          CALL cp_fm_struct_release(fm_struct_ia)
     232             :          ! perform the matrix multiplication
     233             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     234             :                             matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
     235           8 :                             matrix_c=fm_Gamma)
     236           8 :          CALL cp_fm_release(fm_Y)
     237             : 
     238           8 :          CALL cp_fm_create(fm_Gamma_PQ_temp_2, fm_struct_RI, name="fm_Gamma_PQ_temp_2")
     239             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     240             :                             matrix_a=fm_Gamma_PQ_temp, matrix_b=operator_half, beta=0.0_dp, &
     241           8 :                             matrix_c=fm_Gamma_PQ_temp_2)
     242             : 
     243           8 :          CALL cp_fm_create(fm_Gamma_PQ_2, fm_struct_RI, name="fm_Gamma_PQ_2")
     244             :          CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     245             :                             matrix_a=PQ_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
     246           8 :                             matrix_c=fm_Gamma_PQ_temp)
     247           8 :          CALL cp_fm_to_fm(fm_Gamma_PQ_temp, fm_Gamma_PQ_2)
     248           8 :          CALL cp_fm_geadd(1.0_dp, "T", fm_Gamma_PQ_temp, 1.0_dp, fm_Gamma_PQ_2)
     249           8 :          CALL cp_fm_release(fm_Gamma_PQ_temp)
     250           8 :          CALL cp_fm_release(PQ_half)
     251             : 
     252           8 :          CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI)
     253           8 :          CALL cp_fm_struct_release(fm_struct_RI)
     254             :          CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=-1.0_dp, &
     255             :                             matrix_a=operator_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
     256           8 :                             matrix_c=fm_Gamma_PQ)
     257           8 :          CALL cp_fm_release(operator_half)
     258           8 :          CALL cp_fm_release(fm_Gamma_PQ_temp_2)
     259             :       END IF
     260             : 
     261             :       ! now redistribute the data, in this case we go back
     262             :       ! to the original way the integrals were distributed
     263             :       CALL fm2array(Gamma_2D, my_ia_size, my_ia_start, my_ia_end, &
     264             :                     my_group_L_size, my_group_L_start, my_group_L_end, &
     265             :                     group_grid_2_mepos, mepos_2_grid_group, &
     266             :                     para_env_sub%num_pe, ngroup, &
     267         296 :                     fm_Gamma)
     268             : 
     269        1184 :       ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
     270             :       CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
     271             :                     my_group_L_size, my_group_L_start, my_group_L_end, &
     272             :                     group_grid_2_mepos, mepos_2_grid_group, &
     273             :                     para_env_sub%num_pe, ngroup, &
     274         296 :                     fm_Gamma_PQ)
     275         296 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
     276         220 :          CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ)
     277             :       ELSE
     278      303670 :          mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + Gamma_PQ
     279          76 :          DEALLOCATE (Gamma_PQ)
     280             :       END IF
     281             : 
     282         296 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     283          24 :          ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
     284             :          CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
     285             :                        my_group_L_size, my_group_L_start, my_group_L_end, &
     286             :                        group_grid_2_mepos, mepos_2_grid_group, &
     287             :                        para_env_sub%num_pe, ngroup, &
     288           8 :                        fm_Gamma_PQ_2)
     289           8 :          IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
     290           8 :             CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ_2)
     291             :          ELSE
     292           0 :             mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + Gamma_PQ
     293           0 :             DEALLOCATE (Gamma_PQ)
     294             :          END IF
     295             :       END IF
     296             : 
     297             :       ! allocate G_P_ia (DBCSR)
     298         296 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
     299         220 :          nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
     300       13971 :          ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_L_size, nspins))
     301         516 :          DO ispin = 1, nspins
     302       13311 :          DO kkB = 1, my_group_L_size
     303       13091 :             NULLIFY (mp2_env%ri_grad%G_P_ia(kkB, ispin)%matrix)
     304             :          END DO
     305             :          END DO
     306             :       END IF
     307             : 
     308             :       ! create the Gamma_ia_P in DBCSR style
     309             :       CALL create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
     310             :                               my_ia_start, my_ia_end, my_group_L_size, gd_ia, &
     311         296 :                               mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
     312             : 
     313         296 :       DEALLOCATE (pos_info)
     314         296 :       DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
     315         296 :       CALL release_group_dist(gd_ia)
     316         296 :       CALL release_group_dist(gd_P)
     317             : 
     318             :       ! release blacs_env
     319         296 :       CALL cp_blacs_env_release(blacs_env)
     320             : 
     321         296 :       CALL timestop(handle)
     322             : 
     323        2072 :    END SUBROUTINE complete_gamma
     324             : 
     325             : ! **************************************************************************************************
     326             : !> \brief Redistribute a 3d matrix to a 2d matrix
     327             : !> \param B_ia_Q ...
     328             : !> \param BIb_C_2D ...
     329             : !> \param homo ...
     330             : !> \param my_B_size ...
     331             : !> \param virtual ...
     332             : !> \param my_B_virtual_start ...
     333             : !> \param my_ia_start ...
     334             : !> \param my_ia_end ...
     335             : !> \param my_ia_size ...
     336             : !> \param my_group_L_size ...
     337             : !> \param para_env_sub ...
     338             : !> \param gd_B_virtual ...
     339             : ! **************************************************************************************************
     340         296 :    SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     341             :                            my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     342             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
     343             :          INTENT(INOUT)                                   :: B_ia_Q
     344             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     345             :          INTENT(OUT)                                     :: BIb_C_2D
     346             :       INTEGER, INTENT(IN)                                :: homo, my_B_size, virtual, &
     347             :                                                             my_B_virtual_start, my_ia_start, &
     348             :                                                             my_ia_end, my_ia_size, my_group_L_size
     349             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env_sub
     350             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
     351             : 
     352             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mat_3d_to_2d'
     353             : 
     354             :       INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
     355             :          rec_B_virtual_end, rec_B_virtual_start
     356         296 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_C_rec
     357             : 
     358         296 :       CALL timeset(routineN, handle)
     359             : 
     360        1184 :       ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
     361      705779 :       BIb_C_2D = 0.0_dp
     362             : 
     363        1360 :       DO iiB = 1, homo
     364       16940 :          DO jjB = 1, my_B_size
     365       15580 :             ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
     366       16644 :             IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     367      690895 :                BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(iiB, jjB, 1:my_group_L_size)
     368             :             END IF
     369             :          END DO
     370             :       END DO
     371             : 
     372         304 :       DO proc_shift = 1, para_env_sub%num_pe - 1
     373           8 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     374           8 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     375             : 
     376           8 :          CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
     377             : 
     378          40 :          ALLOCATE (BIb_C_rec(homo, rec_B_size, my_group_L_size))
     379       47130 :          BIb_C_rec = 0.0_dp
     380             : 
     381             :          CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
     382           8 :                                     BIb_C_rec, proc_receive)
     383             : 
     384          48 :          DO iiB = 1, homo
     385         438 :             DO jjB = 1, rec_B_size
     386         390 :                ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
     387         430 :                IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     388       17373 :                   BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(iiB, jjB, 1:my_group_L_size)
     389             :                END IF
     390             :             END DO
     391             :          END DO
     392             : 
     393         312 :          DEALLOCATE (BIb_C_rec)
     394             :       END DO
     395         296 :       DEALLOCATE (B_ia_Q)
     396             : 
     397         296 :       CALL timestop(handle)
     398         592 :    END SUBROUTINE mat_3d_to_2d
     399             : 
     400             : ! **************************************************************************************************
     401             : !> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
     402             : !> \param B_ia_Q ...
     403             : !> \param BIb_C_2D ...
     404             : !> \param homo ...
     405             : !> \param my_B_size ...
     406             : !> \param virtual ...
     407             : !> \param my_B_virtual_start ...
     408             : !> \param my_ia_start ...
     409             : !> \param my_ia_end ...
     410             : !> \param my_ia_size ...
     411             : !> \param my_group_L_size ...
     412             : !> \param para_env_sub ...
     413             : !> \param gd_B_virtual ...
     414             : ! **************************************************************************************************
     415         296 :    SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     416             :                                  my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     417             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
     418             :          INTENT(INOUT)                                   :: B_ia_Q
     419             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     420             :          INTENT(OUT)                                     :: BIb_C_2D
     421             :       INTEGER, INTENT(IN)                                :: homo, my_B_size, virtual, &
     422             :                                                             my_B_virtual_start, my_ia_start, &
     423             :                                                             my_ia_end, my_ia_size, my_group_L_size
     424             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env_sub
     425             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
     426             : 
     427             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'mat_3d_to_2d_gamma'
     428             : 
     429             :       INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
     430             :          rec_B_virtual_end, rec_B_virtual_start
     431         296 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_C_rec
     432             : 
     433         296 :       CALL timeset(routineN, handle)
     434             : 
     435        1184 :       ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
     436      705779 :       BIb_C_2D = 0.0_dp
     437             : 
     438        1360 :       DO iiB = 1, homo
     439       16940 :          DO jjB = 1, my_B_size
     440       15580 :             ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
     441       16644 :             IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     442      690895 :                BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(jjB, iiB, 1:my_group_L_size)
     443             :             END IF
     444             :          END DO
     445             :       END DO
     446             : 
     447         304 :       DO proc_shift = 1, para_env_sub%num_pe - 1
     448           8 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     449           8 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     450             : 
     451           8 :          CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
     452             : 
     453          40 :          ALLOCATE (BIb_C_rec(rec_B_size, homo, my_group_L_size))
     454       43544 :          BIb_C_rec = 0.0_dp
     455             : 
     456             :          CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
     457           8 :                                     BIb_C_rec, proc_receive)
     458             : 
     459          48 :          DO iiB = 1, homo
     460         438 :             DO jjB = 1, rec_B_size
     461         390 :                ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
     462         430 :                IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     463       17373 :                   BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(jjB, iiB, 1:my_group_L_size)
     464             :                END IF
     465             :             END DO
     466             :          END DO
     467             : 
     468         312 :          DEALLOCATE (BIb_C_rec)
     469             :       END DO
     470         296 :       DEALLOCATE (B_ia_Q)
     471             : 
     472         296 :       CALL timestop(handle)
     473         592 :    END SUBROUTINE mat_3d_to_2d_gamma
     474             : 
     475             : ! **************************************************************************************************
     476             : !> \brief redistribute local part of array to fm
     477             : !> \param mat2D ...
     478             : !> \param fm_struct ...
     479             : !> \param my_start_row ...
     480             : !> \param my_end_row ...
     481             : !> \param my_start_col ...
     482             : !> \param my_end_col ...
     483             : !> \param gd_row ...
     484             : !> \param gd_col ...
     485             : !> \param group_grid_2_mepos ...
     486             : !> \param ngroup_row ...
     487             : !> \param ngroup_col ...
     488             : !> \param fm_mat ...
     489             : !> \param integ_group_size ...
     490             : !> \param color_group ...
     491             : !> \param do_release_mat whether to release the array (default: yes)
     492             : ! **************************************************************************************************
     493        1188 :    SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
     494             :                        my_start_col, my_end_col, &
     495             :                        gd_row, gd_col, &
     496             :                        group_grid_2_mepos, ngroup_row, ngroup_col, &
     497             :                        fm_mat, integ_group_size, color_group, do_release_mat)
     498             : 
     499             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     500             :          INTENT(INOUT)                                   :: mat2D
     501             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     502             :       INTEGER, INTENT(IN)                                :: my_start_row, my_end_row, my_start_col, &
     503             :                                                             my_end_col
     504             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_row, gd_col
     505             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN)  :: group_grid_2_mepos
     506             :       INTEGER, INTENT(IN)                                :: ngroup_row, ngroup_col
     507             :       TYPE(cp_fm_type), INTENT(OUT)                      :: fm_mat
     508             :       INTEGER, INTENT(IN), OPTIONAL                      :: integ_group_size, color_group
     509             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_release_mat
     510             : 
     511             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'array2fm'
     512             : 
     513             :       INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
     514             :          i_sub, iiB, iii, itmp(2), j_global, j_local, j_sub, jjB, my_num_col_blocks, &
     515             :          my_num_row_blocks, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, num_cols, &
     516             :          num_rec_cols, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
     517             :          proc_shift, rec_col_end, rec_col_size, rec_col_start, rec_counter, rec_pcol, rec_prow, &
     518             :          rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, ref_send_prow, send_counter, &
     519             :          send_pcol, send_prow, size_rec_buffer, size_send_buffer, start_col_block, start_row_block
     520        1188 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, index_col_rec, map_rec_size, &
     521        1188 :                                                             map_send_size
     522        1188 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: blocks_ranges_col, blocks_ranges_row, &
     523        1188 :                                                             grid_2_mepos, grid_ref_2_send_pos, &
     524        1188 :                                                             mepos_2_grid
     525        1188 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     526             :       LOGICAL                                            :: convert_pos, my_do_release_mat
     527             :       REAL(KIND=dp)                                      :: part_col, part_row
     528             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     529        1188 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     530             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     531        1188 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     532             : 
     533        1188 :       CALL timeset(routineN, handle)
     534             : 
     535        1188 :       my_do_release_mat = .TRUE.
     536        1188 :       IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
     537             : 
     538        1188 :       CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
     539             : 
     540             :       ! create the full matrix, (num_rows x num_cols)
     541        1188 :       CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
     542        1188 :       CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
     543             : 
     544             :       ! start filling procedure
     545             :       ! fill the matrix
     546             :       CALL cp_fm_get_info(matrix=fm_mat, &
     547             :                           nrow_local=nrow_local, &
     548             :                           ncol_local=ncol_local, &
     549             :                           row_indices=row_indices, &
     550        1188 :                           col_indices=col_indices)
     551        1188 :       myprow = fm_mat%matrix_struct%context%mepos(1)
     552        1188 :       mypcol = fm_mat%matrix_struct%context%mepos(2)
     553        1188 :       nprow = fm_mat%matrix_struct%context%num_pe(1)
     554        1188 :       npcol = fm_mat%matrix_struct%context%num_pe(2)
     555             : 
     556             :       ! start the communication part
     557             :       ! 0) create array containing the processes position
     558             :       !    and supporting infos
     559        1188 :       CALL timeset(routineN//"_info", handle2)
     560        4752 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     561        4622 :       grid_2_mepos = 0
     562        3564 :       ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
     563        1188 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     564             :       ! sum infos
     565        1188 :       CALL para_env%sum(grid_2_mepos)
     566        3564 :       CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
     567             : 
     568             :       ! 1) loop over my local data and define a map for the proc to send data
     569        3564 :       ALLOCATE (map_send_size(0:para_env%num_pe - 1))
     570        3434 :       map_send_size = 0
     571        1188 :       dummy_proc = 0
     572       73399 :       DO jjB = my_start_col, my_end_col
     573       72211 :          send_pcol = fm_mat%matrix_struct%g2p_col(jjB)
     574     4027804 :          DO iiB = my_start_row, my_end_row
     575     3954405 :             send_prow = fm_mat%matrix_struct%g2p_row(iiB)
     576     3954405 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     577     4026616 :             map_send_size(proc_send) = map_send_size(proc_send) + 1
     578             :          END DO
     579             :       END DO
     580             : 
     581             :       ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
     582        3564 :       ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
     583        3434 :       map_rec_size = 0
     584        1188 :       part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
     585        1188 :       part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
     586             : 
     587             :       ! In some cases we have to convert global positions to positions in a subgroup (RPA)
     588        1188 :       convert_pos = .FALSE.
     589        1188 :       IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .TRUE.
     590             : 
     591       47035 :       DO jjB = 1, nrow_local
     592       45847 :          j_global = row_indices(jjB)
     593             :          ! check the group holding this element
     594             :          ! dirty way, if someone has a better idea ...
     595       45847 :          rec_prow = INT(REAL(j_global - 1, KIND=dp)/part_row)
     596       45847 :          rec_prow = MAX(0, rec_prow)
     597       45847 :          rec_prow = MIN(rec_prow, ngroup_row - 1)
     598             :          DO
     599       45847 :             itmp = get_limit(num_rows, ngroup_row, rec_prow)
     600       45847 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
     601           0 :             IF (j_global < itmp(1)) rec_prow = rec_prow - 1
     602       45847 :             IF (j_global > itmp(2)) rec_prow = rec_prow + 1
     603             :          END DO
     604             : 
     605       45847 :          IF (convert_pos) THEN
     606             :             ! if the group is not in the same RPA group cycle
     607       15254 :             IF ((rec_prow/integ_group_size) .NE. color_group) CYCLE
     608             :             ! convert global position to position into the RPA group
     609       10102 :             rec_prow = MOD(rec_prow, integ_group_size)
     610             :          END IF
     611             : 
     612     3996288 :          DO iiB = 1, ncol_local
     613     3954405 :             i_global = col_indices(iiB)
     614             :             ! check the process in the group holding this element
     615     3954405 :             rec_pcol = INT(REAL(i_global - 1, KIND=dp)/part_col)
     616     3954405 :             rec_pcol = MAX(0, rec_pcol)
     617     3954405 :             rec_pcol = MIN(rec_pcol, ngroup_col - 1)
     618             :             DO
     619     3954405 :                itmp = get_limit(num_cols, ngroup_col, rec_pcol)
     620     3954405 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
     621           0 :                IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
     622     3954405 :                IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
     623             :             END DO
     624             : 
     625     3954405 :             proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
     626             : 
     627     4000252 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
     628             : 
     629             :          END DO ! i_global
     630             :       END DO ! j_global
     631             : 
     632             :       ! 3) check if the local data has to be stored in the new fm_mat
     633        1188 :       IF (map_rec_size(para_env%mepos) > 0) THEN
     634      111728 :          DO jjB = 1, ncol_local
     635      110542 :             j_global = col_indices(jjB)
     636      111728 :             IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
     637     3419375 :                DO iiB = 1, nrow_local
     638     3347164 :                   i_global = row_indices(iiB)
     639     3419375 :                   IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
     640     2598313 :                      fm_mat%local_data(iiB, jjB) = mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1)
     641             :                   END IF
     642             :                END DO
     643             :             END IF
     644             :          END DO
     645             :       END IF
     646        1188 :       CALL timestop(handle2)
     647             : 
     648             :       ! 4) reorder data in the send_buffer
     649        1188 :       CALL timeset(routineN//"_buffer_send", handle2)
     650             :       ! count the number of messages to send
     651        1188 :       number_of_send = 0
     652        2246 :       DO proc_shift = 1, para_env%num_pe - 1
     653        1058 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     654        2246 :          IF (map_send_size(proc_send) > 0) THEN
     655         974 :             number_of_send = number_of_send + 1
     656             :          END IF
     657             :       END DO
     658             :       ! allocate the structure that will hold the messages to be sent
     659        4324 :       ALLOCATE (buffer_send(number_of_send))
     660             :       ! grid_ref_2_send_pos is an array (map) that given a pair
     661             :       ! (ref_send_prow,ref_send_pcol) returns
     662             :       ! the position in the buffer_send associated to that process
     663        3564 :       ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
     664        4622 :       grid_ref_2_send_pos = 0
     665             :       ! finalize the allocation of buffer_send with the actual size
     666             :       ! of each message (actual size is size_send_buffer)
     667        1188 :       send_counter = 0
     668        2246 :       DO proc_shift = 1, para_env%num_pe - 1
     669        1058 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     670        1058 :          size_send_buffer = map_send_size(proc_send)
     671        2246 :          IF (map_send_size(proc_send) > 0) THEN
     672         974 :             send_counter = send_counter + 1
     673             :             ! allocate the sending buffer (msg)
     674        2922 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
     675     1357066 :             buffer_send(send_counter)%msg = 0.0_dp
     676         974 :             buffer_send(send_counter)%proc = proc_send
     677             :             ! get the pointer to prow, pcol of the process that has
     678             :             ! to receive this message
     679         974 :             ref_send_prow = mepos_2_grid(1, proc_send)
     680         974 :             ref_send_pcol = mepos_2_grid(2, proc_send)
     681             :             ! save the rank of the process that has to receive this message
     682         974 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
     683             :          END IF
     684             :       END DO
     685             : 
     686             :       ! loop over the locally held data and fill the buffer_send
     687             :       ! for doing that we need an array that keep track if the
     688             :       ! sequential increase of the index for each message
     689        3350 :       ALLOCATE (iii_vet(number_of_send))
     690        2162 :       iii_vet = 0
     691       71393 :       DO iiB = my_start_row, my_end_row
     692       70205 :          send_prow = fm_mat%matrix_struct%g2p_row(iiB)
     693     4025798 :          DO jjB = my_start_col, my_end_col
     694     3954405 :             send_pcol = fm_mat%matrix_struct%g2p_col(jjB)
     695             :             ! we don't need to send to ourselves
     696     3954405 :             IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
     697             : 
     698     1356092 :             send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
     699     1356092 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     700     1356092 :             iii = iii_vet(send_counter)
     701     4024610 :             buffer_send(send_counter)%msg(iii) = mat2D(iiB - my_start_row + 1, jjB - my_start_col + 1)
     702             :          END DO
     703             :       END DO
     704             : 
     705        1188 :       DEALLOCATE (iii_vet)
     706        1188 :       DEALLOCATE (grid_ref_2_send_pos)
     707        1188 :       IF (my_do_release_mat) DEALLOCATE (mat2D)
     708        1188 :       CALL timestop(handle2)
     709             : 
     710             :       ! 5) similarly to what done for the buffer_send
     711             :       !    create the buffer for receive, post the message with irecv
     712             :       !    and send the messages non-blocking
     713        1188 :       CALL timeset(routineN//"_isendrecv", handle2)
     714             :       ! count the number of messages to be received
     715        1188 :       number_of_rec = 0
     716        2246 :       DO proc_shift = 1, para_env%num_pe - 1
     717        1058 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     718        2246 :          IF (map_rec_size(proc_receive) > 0) THEN
     719         974 :             number_of_rec = number_of_rec + 1
     720             :          END IF
     721             :       END DO
     722             : 
     723        4324 :       ALLOCATE (buffer_rec(number_of_rec))
     724             : 
     725        1188 :       rec_counter = 0
     726        2246 :       DO proc_shift = 1, para_env%num_pe - 1
     727        1058 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     728        1058 :          size_rec_buffer = map_rec_size(proc_receive)
     729        2246 :          IF (map_rec_size(proc_receive) > 0) THEN
     730         974 :             rec_counter = rec_counter + 1
     731             :             ! prepare the buffer for receive
     732        2922 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
     733     1357066 :             buffer_rec(rec_counter)%msg = 0.0_dp
     734         974 :             buffer_rec(rec_counter)%proc = proc_receive
     735             :             ! post the message to be received
     736             :             CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
     737         974 :                                 buffer_rec(rec_counter)%msg_req)
     738             :          END IF
     739             :       END DO
     740             : 
     741             :       ! send messages
     742        4324 :       ALLOCATE (req_send(number_of_send))
     743        1188 :       send_counter = 0
     744        2246 :       DO proc_shift = 1, para_env%num_pe - 1
     745        1058 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     746        2246 :          IF (map_send_size(proc_send) > 0) THEN
     747         974 :             send_counter = send_counter + 1
     748             :             CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
     749         974 :                                 buffer_send(send_counter)%msg_req)
     750         974 :             req_send(send_counter) = buffer_send(send_counter)%msg_req
     751             :          END IF
     752             :       END DO
     753        1188 :       CALL timestop(handle2)
     754             : 
     755             :       ! 6) fill the fm_mat matrix with the messages received from the
     756             :       !    other processes
     757        1188 :       CALL timeset(routineN//"_fill", handle2)
     758             :       ! In order to perform this step efficiently first we have to know the
     759             :       ! ranges of the blocks over which a given process hold its local data.
     760             :       ! Start with the rows ...
     761        1188 :       my_num_row_blocks = 1
     762       45847 :       DO iiB = 1, nrow_local - 1
     763       44659 :          IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
     764       45847 :          my_num_row_blocks = my_num_row_blocks + 1
     765             :       END DO
     766        3564 :       ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
     767        5049 :       blocks_ranges_row = 0
     768        1188 :       blocks_ranges_row(1, 1) = row_indices(1)
     769        1188 :       iii = 1
     770       45847 :       DO iiB = 1, nrow_local - 1
     771       44659 :          IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
     772          99 :          iii = iii + 1
     773          99 :          blocks_ranges_row(2, iii - 1) = row_indices(iiB)
     774       45847 :          blocks_ranges_row(1, iii) = row_indices(iiB + 1)
     775             :       END DO
     776        1188 :       blocks_ranges_row(2, my_num_row_blocks) = row_indices(MAX(nrow_local, 1))
     777             :       ! ... and columns
     778        1188 :       my_num_col_blocks = 1
     779      110544 :       DO jjB = 1, ncol_local - 1
     780      109356 :          IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
     781      110544 :          my_num_col_blocks = my_num_col_blocks + 1
     782             :       END DO
     783        3564 :       ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
     784        4752 :       blocks_ranges_col = 0
     785        1188 :       blocks_ranges_col(1, 1) = col_indices(1)
     786        1188 :       iii = 1
     787      110544 :       DO jjB = 1, ncol_local - 1
     788      109356 :          IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
     789           0 :          iii = iii + 1
     790           0 :          blocks_ranges_col(2, iii - 1) = col_indices(jjB)
     791      110544 :          blocks_ranges_col(1, iii) = col_indices(jjB + 1)
     792             :       END DO
     793        1188 :       blocks_ranges_col(2, my_num_col_blocks) = col_indices(MAX(ncol_local, 1))
     794             : 
     795        1188 :       rec_counter = 0
     796        2246 :       DO proc_shift = 1, para_env%num_pe - 1
     797        1058 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     798        1058 :          size_rec_buffer = map_rec_size(proc_receive)
     799             : 
     800        2246 :          IF (map_rec_size(proc_receive) > 0) THEN
     801         974 :             rec_counter = rec_counter + 1
     802             : 
     803         974 :             CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
     804             : 
     805             :             ! precompute the number of received columns and relative index
     806         974 :             num_rec_cols = 0
     807        1948 :             DO jjB = 1, my_num_col_blocks
     808         974 :                start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
     809         974 :                end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
     810       44929 :                DO j_sub = start_col_block, end_col_block
     811       43955 :                   num_rec_cols = num_rec_cols + 1
     812             :                END DO
     813             :             END DO
     814        2922 :             ALLOCATE (index_col_rec(num_rec_cols))
     815       43955 :             index_col_rec = 0
     816         974 :             iii = 0
     817        1948 :             DO jjB = 1, my_num_col_blocks
     818         974 :                start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
     819         974 :                end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
     820       44929 :                DO j_sub = start_col_block, end_col_block
     821       42981 :                   iii = iii + 1
     822       42981 :                   j_local = fm_mat%matrix_struct%g2l_col(j_sub)
     823       43955 :                   index_col_rec(iii) = j_local
     824             :                END DO
     825             :             END DO
     826             : 
     827         974 :             CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
     828             : 
     829             :             ! wait for the message
     830         974 :             CALL buffer_rec(rec_counter)%msg_req%wait()
     831             : 
     832             :             ! fill the local data in fm_mat
     833         974 :             iii = 0
     834        2047 :             DO iiB = 1, my_num_row_blocks
     835        1073 :                start_row_block = MAX(blocks_ranges_row(1, iiB), rec_row_start)
     836        1073 :                end_row_block = MIN(blocks_ranges_row(2, iiB), rec_row_end)
     837       32666 :                DO i_sub = start_row_block, end_row_block
     838       30619 :                   i_local = fm_mat%matrix_struct%g2l_row(i_sub)
     839     1387784 :                   DO jjB = 1, num_rec_cols
     840     1356092 :                      iii = iii + 1
     841     1356092 :                      j_local = index_col_rec(jjB)
     842     1386711 :                      fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
     843             :                   END DO
     844             :                END DO
     845             :             END DO
     846             : 
     847         974 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
     848        2922 :             DEALLOCATE (index_col_rec)
     849             :          END IF
     850             :       END DO
     851        2162 :       DEALLOCATE (buffer_rec)
     852             : 
     853        1188 :       DEALLOCATE (blocks_ranges_row)
     854        1188 :       DEALLOCATE (blocks_ranges_col)
     855             : 
     856        1188 :       CALL timestop(handle2)
     857             : 
     858             :       ! 7) Finally wait for all messeges to be sent
     859        1188 :       CALL timeset(routineN//"_waitall", handle2)
     860        1188 :       CALL mp_waitall(req_send(:))
     861        2162 :       DO send_counter = 1, number_of_send
     862        2162 :          DEALLOCATE (buffer_send(send_counter)%msg)
     863             :       END DO
     864        2162 :       DEALLOCATE (buffer_send)
     865        1188 :       CALL timestop(handle2)
     866             : 
     867        1188 :       DEALLOCATE (map_send_size)
     868        1188 :       DEALLOCATE (map_rec_size)
     869        1188 :       DEALLOCATE (grid_2_mepos)
     870        1188 :       DEALLOCATE (mepos_2_grid)
     871             : 
     872        1188 :       CALL timestop(handle)
     873             : 
     874        9504 :    END SUBROUTINE array2fm
     875             : 
     876             : ! **************************************************************************************************
     877             : !> \brief redistribute fm to local part of array
     878             : !> \param mat2D ...
     879             : !> \param my_rows ...
     880             : !> \param my_start_row ...
     881             : !> \param my_end_row ...
     882             : !> \param my_cols ...
     883             : !> \param my_start_col ...
     884             : !> \param my_end_col ...
     885             : !> \param group_grid_2_mepos ...
     886             : !> \param mepos_2_grid_group ...
     887             : !> \param ngroup_row ...
     888             : !> \param ngroup_col ...
     889             : !> \param fm_mat ...
     890             : ! **************************************************************************************************
     891         694 :    SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
     892             :                        my_cols, my_start_col, my_end_col, &
     893             :                        group_grid_2_mepos, mepos_2_grid_group, &
     894             :                        ngroup_row, ngroup_col, &
     895             :                        fm_mat)
     896             : 
     897             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     898             :          INTENT(OUT)                                     :: mat2D
     899             :       INTEGER, INTENT(IN)                                :: my_rows, my_start_row, my_end_row, &
     900             :                                                             my_cols, my_start_col, my_end_col
     901             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN)  :: group_grid_2_mepos, mepos_2_grid_group
     902             :       INTEGER, INTENT(IN)                                :: ngroup_row, ngroup_col
     903             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_mat
     904             : 
     905             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'fm2array'
     906             : 
     907             :       INTEGER :: dummy_proc, handle, handle2, i_global, iiB, iii, itmp(2), j_global, jjB, mypcol, &
     908             :          myprow, ncol_local, npcol, nprow, nrow_local, num_cols, num_rec_rows, num_rows, &
     909             :          number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_col_size, &
     910             :          rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, ref_send_prow, &
     911             :          send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
     912         694 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, index_row_rec, map_rec_size, &
     913         694 :                                                             map_send_size
     914         694 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: grid_2_mepos, grid_ref_2_send_pos, &
     915         694 :                                                             mepos_2_grid, sizes
     916         694 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     917             :       REAL(KIND=dp)                                      :: part_col, part_row
     918             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     919         694 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     920             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     921         694 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     922             : 
     923         694 :       CALL timeset(routineN, handle)
     924             : 
     925             :       ! allocate the array
     926        2776 :       ALLOCATE (mat2D(my_rows, my_cols))
     927     2155176 :       mat2D = 0.0_dp
     928             : 
     929             :       ! start procedure
     930             :       ! get information of from the full matrix
     931             :       CALL cp_fm_get_info(matrix=fm_mat, &
     932             :                           nrow_local=nrow_local, &
     933             :                           ncol_local=ncol_local, &
     934             :                           row_indices=row_indices, &
     935             :                           col_indices=col_indices, &
     936             :                           nrow_global=num_rows, &
     937             :                           ncol_global=num_cols, &
     938         694 :                           para_env=para_env)
     939         694 :       myprow = fm_mat%matrix_struct%context%mepos(1)
     940         694 :       mypcol = fm_mat%matrix_struct%context%mepos(2)
     941         694 :       nprow = fm_mat%matrix_struct%context%num_pe(1)
     942         694 :       npcol = fm_mat%matrix_struct%context%num_pe(2)
     943             : 
     944             :       ! start the communication part
     945             :       ! 0) create array containing the processes position
     946             :       !    and supporting infos
     947         694 :       CALL timeset(routineN//"_info", handle2)
     948        2776 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     949        2776 :       grid_2_mepos = 0
     950        2082 :       ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
     951             : 
     952             :       ! fill the info array
     953         694 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     954             : 
     955             :       ! sum infos
     956         694 :       CALL para_env%sum(grid_2_mepos)
     957        2082 :       CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
     958             : 
     959             :       ! 1) loop over my local data and define a map for the proc to send data
     960        2082 :       ALLOCATE (map_send_size(0:para_env%num_pe - 1))
     961        2082 :       map_send_size = 0
     962         694 :       part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
     963         694 :       part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
     964             : 
     965       59132 :       DO jjB = 1, ncol_local
     966       58438 :          j_global = col_indices(jjB)
     967             :          ! check the group holding this element
     968             :          ! dirty way, if someone has a better idea ...
     969       58438 :          send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
     970       58438 :          send_pcol = MAX(0, send_pcol)
     971       58438 :          send_pcol = MIN(send_pcol, ngroup_col - 1)
     972             :          DO
     973       58438 :             itmp = get_limit(num_cols, ngroup_col, send_pcol)
     974       58438 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
     975           0 :             IF (j_global < itmp(1)) send_pcol = send_pcol - 1
     976       58438 :             IF (j_global > itmp(2)) send_pcol = send_pcol + 1
     977             :          END DO
     978             : 
     979     2182959 :          DO iiB = 1, nrow_local
     980     2123827 :             i_global = row_indices(iiB)
     981             :             ! check the process in the group holding this element
     982     2123827 :             send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
     983     2123827 :             send_prow = MAX(0, send_prow)
     984     2123827 :             send_prow = MIN(send_prow, ngroup_row - 1)
     985             :             DO
     986     2123827 :                itmp = get_limit(num_rows, ngroup_row, send_prow)
     987     2123827 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
     988           0 :                IF (i_global < itmp(1)) send_prow = send_prow - 1
     989     2123827 :                IF (i_global > itmp(2)) send_prow = send_prow + 1
     990             :             END DO
     991             : 
     992     2123827 :             proc_send = group_grid_2_mepos(send_prow, send_pcol)
     993             : 
     994     2182265 :             map_send_size(proc_send) = map_send_size(proc_send) + 1
     995             : 
     996             :          END DO ! i_global
     997             :       END DO ! j_global
     998             : 
     999             :       ! 2) loop over my local data of the array and define a map for the proc from which rec data
    1000        2082 :       ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
    1001        2082 :       map_rec_size = 0
    1002         694 :       dummy_proc = 0
    1003       31349 :       DO jjB = my_start_col, my_end_col
    1004       30655 :          rec_pcol = fm_mat%matrix_struct%g2p_col(jjB)
    1005     2155176 :          DO iiB = my_start_row, my_end_row
    1006     2123827 :             rec_prow = fm_mat%matrix_struct%g2p_row(iiB)
    1007     2123827 :             proc_receive = grid_2_mepos(rec_prow, rec_pcol)
    1008     2154482 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
    1009             :          END DO
    1010             :       END DO
    1011             : 
    1012             :       ! 3) check if the local data in the fm_mat has to be stored in the new array
    1013         694 :       IF (map_rec_size(para_env%mepos) > 0) THEN
    1014       59132 :          DO jjB = 1, ncol_local
    1015       58438 :             j_global = col_indices(jjB)
    1016       59132 :             IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
    1017     1157441 :                DO iiB = 1, nrow_local
    1018     1126786 :                   i_global = row_indices(iiB)
    1019     1157441 :                   IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
    1020     1125906 :                      mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iiB, jjB)
    1021             :                   END IF
    1022             :                END DO
    1023             :             END IF
    1024             :          END DO
    1025             :       END IF
    1026         694 :       CALL timestop(handle2)
    1027             : 
    1028             :       ! 4) reorder data in the send_buffer
    1029         694 :       CALL timeset(routineN//"_buffer_send", handle2)
    1030             :       ! count the number of messages to send
    1031         694 :       number_of_send = 0
    1032        1388 :       DO proc_shift = 1, para_env%num_pe - 1
    1033         694 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1034        1388 :          IF (map_send_size(proc_send) > 0) THEN
    1035         664 :             number_of_send = number_of_send + 1
    1036             :          END IF
    1037             :       END DO
    1038             :       ! allocate the structure that will hold the messages to be sent
    1039        2716 :       ALLOCATE (buffer_send(number_of_send))
    1040             :       ! grid_ref_2_send_pos is an array (map) that given a pair
    1041             :       ! (ref_send_prow,ref_send_pcol) returns
    1042             :       ! the position in the buffer_send associated to that process
    1043             : 
    1044        2776 :       ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
    1045        3438 :       grid_ref_2_send_pos = 0
    1046             : 
    1047             :       ! finalize the allocation of buffer_send with the actual size
    1048             :       ! of each message (actual size is size_send_buffer)
    1049         694 :       send_counter = 0
    1050        1388 :       DO proc_shift = 1, para_env%num_pe - 1
    1051         694 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1052         694 :          size_send_buffer = map_send_size(proc_send)
    1053        1388 :          IF (map_send_size(proc_send) > 0) THEN
    1054         664 :             send_counter = send_counter + 1
    1055             :             ! allocate the sending buffer (msg)
    1056        1992 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
    1057      998585 :             buffer_send(send_counter)%msg = 0.0_dp
    1058         664 :             buffer_send(send_counter)%proc = proc_send
    1059             :             ! get the pointer to prow, pcol of the process that has
    1060             :             ! to receive this message
    1061         664 :             ref_send_prow = mepos_2_grid_group(1, proc_send)
    1062         664 :             ref_send_pcol = mepos_2_grid_group(2, proc_send)
    1063             :             ! save the rank of the process that has to receive this message
    1064         664 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
    1065             :          END IF
    1066             :       END DO
    1067             : 
    1068             :       ! loop over the locally held data and fill the buffer_send
    1069             :       ! for doing that we need an array that keep track if the
    1070             :       ! sequential increase of the index for each message
    1071        2052 :       ALLOCATE (iii_vet(number_of_send))
    1072        1358 :       iii_vet = 0
    1073       59132 :       DO jjB = 1, ncol_local
    1074       58438 :          j_global = col_indices(jjB)
    1075             :          ! check the group holding this element
    1076             :          ! dirty way, if someone has a better idea ...
    1077       58438 :          send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
    1078       58438 :          send_pcol = MAX(0, send_pcol)
    1079       58438 :          send_pcol = MIN(send_pcol, ngroup_col - 1)
    1080             :          DO
    1081       58438 :             itmp = get_limit(num_cols, ngroup_col, send_pcol)
    1082       58438 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
    1083           0 :             IF (j_global < itmp(1)) send_pcol = send_pcol - 1
    1084       58438 :             IF (j_global > itmp(2)) send_pcol = send_pcol + 1
    1085             :          END DO
    1086             : 
    1087     2182959 :          DO iiB = 1, nrow_local
    1088     2123827 :             i_global = row_indices(iiB)
    1089             :             ! check the process in the group holding this element
    1090     2123827 :             send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
    1091     2123827 :             send_prow = MAX(0, send_prow)
    1092     2123827 :             send_prow = MIN(send_prow, ngroup_row - 1)
    1093             :             DO
    1094     2123827 :                itmp = get_limit(num_rows, ngroup_row, send_prow)
    1095     2123827 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
    1096           0 :                IF (i_global < itmp(1)) send_prow = send_prow - 1
    1097     2123827 :                IF (i_global > itmp(2)) send_prow = send_prow + 1
    1098             :             END DO
    1099             :             ! we don't need to send to ourselves
    1100     2123827 :             IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
    1101             : 
    1102      997921 :             send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
    1103      997921 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
    1104      997921 :             iii = iii_vet(send_counter)
    1105     2182265 :             buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iiB, jjB)
    1106             :          END DO
    1107             :       END DO
    1108             : 
    1109         694 :       DEALLOCATE (iii_vet)
    1110         694 :       DEALLOCATE (grid_ref_2_send_pos)
    1111         694 :       CALL timestop(handle2)
    1112             : 
    1113             :       ! 5) similarly to what done for the buffer_send
    1114             :       !    create the buffer for receive, post the message with irecv
    1115             :       !    and send the messages with non-blocking
    1116         694 :       CALL timeset(routineN//"_isendrecv", handle2)
    1117             :       ! count the number of messages to be received
    1118         694 :       number_of_rec = 0
    1119        1388 :       DO proc_shift = 1, para_env%num_pe - 1
    1120         694 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1121        1388 :          IF (map_rec_size(proc_receive) > 0) THEN
    1122         664 :             number_of_rec = number_of_rec + 1
    1123             :          END IF
    1124             :       END DO
    1125             : 
    1126        2716 :       ALLOCATE (buffer_rec(number_of_rec))
    1127             : 
    1128         694 :       rec_counter = 0
    1129        1388 :       DO proc_shift = 1, para_env%num_pe - 1
    1130         694 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1131         694 :          size_rec_buffer = map_rec_size(proc_receive)
    1132        1388 :          IF (map_rec_size(proc_receive) > 0) THEN
    1133         664 :             rec_counter = rec_counter + 1
    1134             :             ! prepare the buffer for receive
    1135        1992 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
    1136      998585 :             buffer_rec(rec_counter)%msg = 0.0_dp
    1137         664 :             buffer_rec(rec_counter)%proc = proc_receive
    1138             :             ! post the message to be received
    1139             :             CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
    1140         664 :                                 buffer_rec(rec_counter)%msg_req)
    1141             :          END IF
    1142             :       END DO
    1143             : 
    1144             :       ! send messages
    1145        2716 :       ALLOCATE (req_send(number_of_send))
    1146         694 :       send_counter = 0
    1147        1388 :       DO proc_shift = 1, para_env%num_pe - 1
    1148         694 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1149        1388 :          IF (map_send_size(proc_send) > 0) THEN
    1150         664 :             send_counter = send_counter + 1
    1151             :             CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
    1152         664 :                                 buffer_send(send_counter)%msg_req)
    1153         664 :             req_send(send_counter) = buffer_send(send_counter)%msg_req
    1154             :          END IF
    1155             :       END DO
    1156         694 :       CALL timestop(handle2)
    1157             : 
    1158             :       ! 6) fill the fm_mat matrix with the messages received from the
    1159             :       !    other processes
    1160         694 :       CALL timeset(routineN//"_fill", handle2)
    1161             :       CALL cp_fm_get_info(matrix=fm_mat, &
    1162             :                           nrow_local=nrow_local, &
    1163         694 :                           ncol_local=ncol_local)
    1164        2082 :       ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
    1165        2082 :       CALL para_env%allgather([nrow_local, ncol_local], sizes)
    1166        2082 :       iiB = MAXVAL(sizes(1, :))
    1167        2082 :       ALLOCATE (index_row_rec(iiB))
    1168       25488 :       index_row_rec = 0
    1169         694 :       rec_counter = 0
    1170        1388 :       DO proc_shift = 1, para_env%num_pe - 1
    1171         694 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1172         694 :          size_rec_buffer = map_rec_size(proc_receive)
    1173             : 
    1174        1388 :          IF (map_rec_size(proc_receive) > 0) THEN
    1175         664 :             rec_counter = rec_counter + 1
    1176             : 
    1177         664 :             rec_col_size = sizes(2, proc_receive)
    1178         664 :             rec_row_size = sizes(1, proc_receive)
    1179             : 
    1180             :             ! precompute the indeces of the rows
    1181         664 :             num_rec_rows = 0
    1182       23969 :             DO iiB = 1, rec_row_size
    1183       23305 :                i_global = fm_mat%matrix_struct%l2g_row(iiB, mepos_2_grid(1, proc_receive))
    1184       23969 :                IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
    1185       23180 :                   num_rec_rows = num_rec_rows + 1
    1186       23180 :                   index_row_rec(num_rec_rows) = i_global
    1187             :                END IF
    1188             :             END DO
    1189             : 
    1190         664 :             CALL buffer_rec(rec_counter)%msg_req%wait()
    1191             : 
    1192         664 :             iii = 0
    1193       56450 :             DO jjB = 1, rec_col_size
    1194       55786 :                j_global = fm_mat%matrix_struct%l2g_col(jjB, mepos_2_grid(2, proc_receive))
    1195       56450 :                IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
    1196     1025924 :                   DO iiB = 1, num_rec_rows
    1197      997921 :                      i_global = index_row_rec(iiB)
    1198      997921 :                      iii = iii + 1
    1199     1025924 :                      mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
    1200             :                   END DO
    1201             :                END IF
    1202             :             END DO
    1203             : 
    1204         664 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
    1205             :          END IF
    1206             :       END DO
    1207        1358 :       DEALLOCATE (buffer_rec)
    1208         694 :       DEALLOCATE (index_row_rec)
    1209         694 :       CALL cp_fm_release(fm_mat)
    1210         694 :       CALL timestop(handle2)
    1211             : 
    1212             :       ! 7) Finally wait for all messeges to be sent
    1213         694 :       CALL timeset(routineN//"_waitall", handle2)
    1214         694 :       CALL mp_waitall(req_send(:))
    1215        1358 :       DO send_counter = 1, number_of_send
    1216        1358 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1217             :       END DO
    1218        1358 :       DEALLOCATE (buffer_send)
    1219         694 :       CALL timestop(handle2)
    1220             : 
    1221         694 :       CALL timestop(handle)
    1222             : 
    1223        4858 :    END SUBROUTINE fm2array
    1224             : 
    1225             : ! **************************************************************************************************
    1226             : !> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
    1227             : !> \param Gamma_2D ...
    1228             : !> \param homo ...
    1229             : !> \param virtual ...
    1230             : !> \param dimen_ia ...
    1231             : !> \param para_env_sub ...
    1232             : !> \param my_ia_start ...
    1233             : !> \param my_ia_end ...
    1234             : !> \param my_group_L_size ...
    1235             : !> \param gd_ia ...
    1236             : !> \param dbcsr_Gamma ...
    1237             : !> \param mo_coeff_o ...
    1238             : ! **************************************************************************************************
    1239         344 :    SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
    1240             :                                  my_ia_start, my_ia_end, my_group_L_size, &
    1241         344 :                                  gd_ia, dbcsr_Gamma, mo_coeff_o)
    1242             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: Gamma_2D
    1243             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen_ia
    1244             :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env_sub
    1245             :       INTEGER, INTENT(IN)                                :: my_ia_start, my_ia_end, my_group_L_size
    1246             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_ia
    1247             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: dbcsr_Gamma
    1248             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mo_coeff_o
    1249             : 
    1250             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'create_dbcsr_gamma'
    1251             : 
    1252             :       INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iiB, iii, itmp(2), j_global, &
    1253             :          j_local, jjB, jjj, kkB, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, &
    1254             :          number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_counter, &
    1255             :          rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, &
    1256             :          ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
    1257         344 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, map_rec_size, map_send_size
    1258         344 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: grid_2_mepos, grid_ref_2_send_pos, &
    1259         344 :                                                             indeces_map_my, mepos_2_grid
    1260         344 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1261             :       REAL(KIND=dp)                                      :: part_ia
    1262             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1263             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
    1264             :       TYPE(cp_fm_type)                                   :: fm_ia
    1265         344 :       TYPE(index_map), ALLOCATABLE, DIMENSION(:)         :: indeces_rec
    1266             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
    1267         344 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
    1268         344 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
    1269             : 
    1270         344 :       CALL timeset(routineN, handle)
    1271             : 
    1272             :       ! create sub blacs env
    1273         344 :       NULLIFY (blacs_env)
    1274         344 :       CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
    1275             : 
    1276             :       ! create the fm_ia buffer matrix
    1277         344 :       NULLIFY (fm_struct)
    1278             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
    1279         344 :                                ncol_global=virtual, para_env=para_env_sub)
    1280         344 :       CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
    1281             :       ! release structure
    1282         344 :       CALL cp_fm_struct_release(fm_struct)
    1283             :       ! release blacs_env
    1284         344 :       CALL cp_blacs_env_release(blacs_env)
    1285             : 
    1286             :       ! get array information
    1287             :       CALL cp_fm_get_info(matrix=fm_ia, &
    1288             :                           nrow_local=nrow_local, &
    1289             :                           ncol_local=ncol_local, &
    1290             :                           row_indices=row_indices, &
    1291         344 :                           col_indices=col_indices)
    1292         344 :       myprow = fm_ia%matrix_struct%context%mepos(1)
    1293         344 :       mypcol = fm_ia%matrix_struct%context%mepos(2)
    1294         344 :       nprow = fm_ia%matrix_struct%context%num_pe(1)
    1295         344 :       npcol = fm_ia%matrix_struct%context%num_pe(2)
    1296             : 
    1297             :       ! 0) create array containing the processes position and supporting infos
    1298        1376 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
    1299        1048 :       grid_2_mepos = 0
    1300        1032 :       ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
    1301             :       ! fill the info array
    1302         344 :       grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
    1303             :       ! sum infos
    1304         344 :       CALL para_env_sub%sum(grid_2_mepos)
    1305        1032 :       CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
    1306             : 
    1307             :       ! loop over local index range and define the sending map
    1308        1032 :       ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
    1309         704 :       map_send_size = 0
    1310         344 :       dummy_proc = 0
    1311       19420 :       DO iaia = my_ia_start, my_ia_end
    1312       19076 :          i_global = (iaia - 1)/virtual + 1
    1313       19076 :          j_global = MOD(iaia - 1, virtual) + 1
    1314       19076 :          send_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1315       19076 :          send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1316       19076 :          proc_send = grid_2_mepos(send_prow, send_pcol)
    1317       19420 :          map_send_size(proc_send) = map_send_size(proc_send) + 1
    1318             :       END DO
    1319             : 
    1320             :       ! loop over local data of fm_ia and define the receiving map
    1321        1032 :       ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
    1322         704 :       map_rec_size = 0
    1323         344 :       part_ia = REAL(dimen_ia, KIND=dp)/REAL(para_env_sub%num_pe, KIND=dp)
    1324             : 
    1325        1556 :       DO iiB = 1, nrow_local
    1326        1212 :          i_global = row_indices(iiB)
    1327       20632 :          DO jjB = 1, ncol_local
    1328       19076 :             j_global = col_indices(jjB)
    1329       19076 :             iaia = (i_global - 1)*virtual + j_global
    1330       19076 :             proc_receive = INT(REAL(iaia - 1, KIND=dp)/part_ia)
    1331       19076 :             proc_receive = MAX(0, proc_receive)
    1332       19076 :             proc_receive = MIN(proc_receive, para_env_sub%num_pe - 1)
    1333             :             DO
    1334       19076 :                itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
    1335       19076 :                IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
    1336           0 :                IF (iaia < itmp(1)) proc_receive = proc_receive - 1
    1337       19076 :                IF (iaia > itmp(2)) proc_receive = proc_receive + 1
    1338             :             END DO
    1339       20288 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
    1340             :          END DO
    1341             :       END DO
    1342             : 
    1343             :       ! allocate the buffer for sending data
    1344         344 :       number_of_send = 0
    1345         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1346          16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1347         360 :          IF (map_send_size(proc_send) > 0) THEN
    1348           2 :             number_of_send = number_of_send + 1
    1349             :          END IF
    1350             :       END DO
    1351             :       ! allocate the structure that will hold the messages to be sent
    1352         692 :       ALLOCATE (buffer_send(number_of_send))
    1353             :       ! and the map from the grid of processess to the message position
    1354        1032 :       ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
    1355        1048 :       grid_ref_2_send_pos = 0
    1356             :       ! finally allocate each message
    1357         344 :       send_counter = 0
    1358         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1359          16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1360          16 :          size_send_buffer = map_send_size(proc_send)
    1361         360 :          IF (map_send_size(proc_send) > 0) THEN
    1362           2 :             send_counter = send_counter + 1
    1363             :             ! allocate the sending buffer (msg)
    1364           6 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
    1365           2 :             buffer_send(send_counter)%proc = proc_send
    1366             :             ! get the pointer to prow, pcol of the process that has
    1367             :             ! to receive this message
    1368           2 :             ref_send_prow = mepos_2_grid(1, proc_send)
    1369           2 :             ref_send_pcol = mepos_2_grid(2, proc_send)
    1370             :             ! save the rank of the process that has to receive this message
    1371           2 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
    1372             :          END IF
    1373             :       END DO
    1374             : 
    1375             :       ! allocate the buffer for receiving data
    1376             :       number_of_rec = 0
    1377         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1378          16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1379         360 :          IF (map_rec_size(proc_receive) > 0) THEN
    1380           2 :             number_of_rec = number_of_rec + 1
    1381             :          END IF
    1382             :       END DO
    1383             :       ! allocate the structure that will hold the messages to be received
    1384             :       ! and relative indeces
    1385         692 :       ALLOCATE (buffer_rec(number_of_rec))
    1386         692 :       ALLOCATE (indeces_rec(number_of_rec))
    1387             :       ! finally allocate each message and fill the array of indeces
    1388         344 :       rec_counter = 0
    1389         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1390          16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1391          16 :          size_rec_buffer = map_rec_size(proc_receive)
    1392         360 :          IF (map_rec_size(proc_receive) > 0) THEN
    1393           2 :             rec_counter = rec_counter + 1
    1394             :             ! prepare the buffer for receive
    1395           6 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
    1396           2 :             buffer_rec(rec_counter)%proc = proc_receive
    1397             :             ! create the indeces array
    1398           6 :             ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
    1399          59 :             indeces_rec(rec_counter)%map = 0
    1400           2 :             CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
    1401           2 :             iii = 0
    1402         120 :             DO iaia = rec_iaia_start, rec_iaia_end
    1403         118 :                i_global = (iaia - 1)/virtual + 1
    1404         118 :                j_global = MOD(iaia - 1, virtual) + 1
    1405         118 :                rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1406         118 :                rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1407         118 :                IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
    1408          19 :                iii = iii + 1
    1409          19 :                i_local = fm_ia%matrix_struct%g2l_row(i_global)
    1410          19 :                j_local = fm_ia%matrix_struct%g2l_col(j_global)
    1411          19 :                indeces_rec(rec_counter)%map(1, iii) = i_local
    1412         120 :                indeces_rec(rec_counter)%map(2, iii) = j_local
    1413             :             END DO
    1414             :          END IF
    1415             :       END DO
    1416             :       ! and create the index map for my local data
    1417         344 :       IF (map_rec_size(para_env_sub%mepos) > 0) THEN
    1418         344 :          size_rec_buffer = map_rec_size(para_env_sub%mepos)
    1419        1032 :          ALLOCATE (indeces_map_my(2, size_rec_buffer))
    1420       57515 :          indeces_map_my = 0
    1421             :          iii = 0
    1422       19420 :          DO iaia = my_ia_start, my_ia_end
    1423       19076 :             i_global = (iaia - 1)/virtual + 1
    1424       19076 :             j_global = MOD(iaia - 1, virtual) + 1
    1425       19076 :             rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1426       19076 :             rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1427       19076 :             IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
    1428       19057 :             iii = iii + 1
    1429       19057 :             i_local = fm_ia%matrix_struct%g2l_row(i_global)
    1430       19057 :             j_local = fm_ia%matrix_struct%g2l_col(j_global)
    1431       19057 :             indeces_map_my(1, iii) = i_local
    1432       19420 :             indeces_map_my(2, iii) = j_local
    1433             :          END DO
    1434             :       END IF
    1435             : 
    1436             :       ! auxiliary vector of indeces for the send buffer
    1437         690 :       ALLOCATE (iii_vet(number_of_send))
    1438             :       ! vector for the send requests
    1439         692 :       ALLOCATE (req_send(number_of_send))
    1440             :       ! loop over auxiliary basis function and redistribute into a fm
    1441             :       ! and then compy the fm into a dbcsr matrix
    1442       15575 :       DO kkB = 1, my_group_L_size
    1443             :          ! zero the matries of the buffers and post the messages to be received
    1444       15231 :          CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
    1445       15231 :          rec_counter = 0
    1446       16667 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1447        1436 :             proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1448       16667 :             IF (map_rec_size(proc_receive) > 0) THEN
    1449         220 :                rec_counter = rec_counter + 1
    1450        2310 :                buffer_rec(rec_counter)%msg = 0.0_dp
    1451             :                CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
    1452         220 :                                        buffer_rec(rec_counter)%msg_req)
    1453             :             END IF
    1454             :          END DO
    1455             :          ! fill the sending buffer and send the messages
    1456       15451 :          DO send_counter = 1, number_of_send
    1457       17541 :             buffer_send(send_counter)%msg = 0.0_dp
    1458             :          END DO
    1459       15451 :          iii_vet = 0
    1460             :          jjj = 0
    1461      875195 :          DO iaia = my_ia_start, my_ia_end
    1462      859964 :             i_global = (iaia - 1)/virtual + 1
    1463      859964 :             j_global = MOD(iaia - 1, virtual) + 1
    1464      859964 :             send_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1465      859964 :             send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1466      859964 :             proc_send = grid_2_mepos(send_prow, send_pcol)
    1467             :             ! we don't need to send to ourselves
    1468      875195 :             IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
    1469             :                ! filling fm_ia with local data
    1470      857874 :                jjj = jjj + 1
    1471      857874 :                i_local = indeces_map_my(1, jjj)
    1472      857874 :                j_local = indeces_map_my(2, jjj)
    1473      857874 :                fm_ia%local_data(i_local, j_local) = Gamma_2D(iaia - my_ia_start + 1, kkB)
    1474             :             ELSE
    1475        2090 :                send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
    1476        2090 :                iii_vet(send_counter) = iii_vet(send_counter) + 1
    1477        2090 :                iii = iii_vet(send_counter)
    1478        2090 :                buffer_send(send_counter)%msg(iii) = Gamma_2D(iaia - my_ia_start + 1, kkB)
    1479             :             END IF
    1480             :          END DO
    1481       15451 :          req_send = mp_request_null
    1482       15231 :          send_counter = 0
    1483       16667 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1484        1436 :             proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1485       16667 :             IF (map_send_size(proc_send) > 0) THEN
    1486         220 :                send_counter = send_counter + 1
    1487             :                CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
    1488         220 :                                        buffer_send(send_counter)%msg_req)
    1489         220 :                req_send(send_counter) = buffer_send(send_counter)%msg_req
    1490             :             END IF
    1491             :          END DO
    1492             : 
    1493             :          ! receive the massages and fill the fm_ia
    1494       15231 :          rec_counter = 0
    1495       16667 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1496        1436 :             proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1497        1436 :             size_rec_buffer = map_rec_size(proc_receive)
    1498       16667 :             IF (map_rec_size(proc_receive) > 0) THEN
    1499         220 :                rec_counter = rec_counter + 1
    1500             :                ! wait for the message
    1501         220 :                CALL buffer_rec(rec_counter)%msg_req%wait()
    1502        2310 :                DO iii = 1, size_rec_buffer
    1503        2090 :                   i_local = indeces_rec(rec_counter)%map(1, iii)
    1504        2090 :                   j_local = indeces_rec(rec_counter)%map(2, iii)
    1505        2310 :                   fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1506             :                END DO
    1507             :             END IF
    1508             :          END DO
    1509             : 
    1510             :          ! wait all
    1511       15231 :          CALL mp_waitall(req_send(:))
    1512             : 
    1513             :          ! now create the DBCSR matrix and copy fm_ia into it
    1514       15231 :          ALLOCATE (dbcsr_Gamma(kkB)%matrix)
    1515             :          CALL cp_dbcsr_m_by_n_from_template(dbcsr_Gamma(kkB)%matrix, &
    1516             :                                             template=mo_coeff_o, &
    1517       15231 :                                             m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
    1518       15575 :          CALL copy_fm_to_dbcsr(fm_ia, dbcsr_Gamma(kkB)%matrix, keep_sparsity=.FALSE.)
    1519             : 
    1520             :       END DO
    1521             : 
    1522             :       ! deallocate stuff
    1523         344 :       DEALLOCATE (Gamma_2D)
    1524         344 :       DEALLOCATE (iii_vet)
    1525         344 :       DEALLOCATE (req_send)
    1526         344 :       IF (map_rec_size(para_env_sub%mepos) > 0) THEN
    1527         344 :          DEALLOCATE (indeces_map_my)
    1528             :       END IF
    1529         346 :       DO rec_counter = 1, number_of_rec
    1530           2 :          DEALLOCATE (indeces_rec(rec_counter)%map)
    1531         346 :          DEALLOCATE (buffer_rec(rec_counter)%msg)
    1532             :       END DO
    1533         346 :       DEALLOCATE (indeces_rec)
    1534         346 :       DEALLOCATE (buffer_rec)
    1535         346 :       DO send_counter = 1, number_of_send
    1536         346 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1537             :       END DO
    1538         346 :       DEALLOCATE (buffer_send)
    1539         344 :       DEALLOCATE (map_send_size)
    1540         344 :       DEALLOCATE (map_rec_size)
    1541         344 :       DEALLOCATE (grid_2_mepos)
    1542         344 :       DEALLOCATE (mepos_2_grid)
    1543             : 
    1544             :       ! release buffer matrix
    1545         344 :       CALL cp_fm_release(fm_ia)
    1546             : 
    1547         344 :       CALL timestop(handle)
    1548             : 
    1549        1376 :    END SUBROUTINE create_dbcsr_gamma
    1550             : 
    1551             : ! **************************************************************************************************
    1552             : !> \brief prepare array for redistribution
    1553             : !> \param para_env ...
    1554             : !> \param para_env_sub ...
    1555             : !> \param ngroup ...
    1556             : !> \param group_grid_2_mepos ...
    1557             : !> \param mepos_2_grid_group ...
    1558             : !> \param pos_info ...
    1559             : ! **************************************************************************************************
    1560         338 :    SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
    1561             :                                      group_grid_2_mepos, mepos_2_grid_group, &
    1562             :                                      pos_info)
    1563             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env, para_env_sub
    1564             :       INTEGER, INTENT(IN)                                :: ngroup
    1565             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
    1566             :       INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
    1567             :          OPTIONAL                                        :: pos_info
    1568             : 
    1569             :       INTEGER                                            :: i, pos_group, pos_sub
    1570         338 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: my_pos_info
    1571             : 
    1572        1014 :       ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
    1573         338 :       CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
    1574             : 
    1575        1352 :       ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
    1576        1674 :       group_grid_2_mepos = 0
    1577        1014 :       ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
    1578        2366 :       mepos_2_grid_group = 0
    1579             : 
    1580        1014 :       DO i = 0, para_env%num_pe - 1
    1581             :          ! calculate position of the group
    1582         676 :          pos_group = i/para_env_sub%num_pe
    1583             :          ! calculate position in the subgroup
    1584         676 :          pos_sub = my_pos_info(i)
    1585             :          ! fill the map from the grid of groups to process
    1586         676 :          group_grid_2_mepos(pos_sub, pos_group) = i
    1587             :          ! and the opposite, from the global pos to the grid pos
    1588         676 :          mepos_2_grid_group(1, i) = pos_sub
    1589        1014 :          mepos_2_grid_group(2, i) = pos_group
    1590             :       END DO
    1591             : 
    1592         338 :       IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
    1593             : 
    1594         338 :    END SUBROUTINE prepare_redistribution
    1595             : 
    1596           0 : END MODULE mp2_ri_grad_util

Generated by: LCOV version 1.15