LCOV - code coverage report
Current view: top level - src - library_tests.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b8e0b09) Lines: 624 703 88.8 %
Date: 2024-08-31 06:31:37 Functions: 11 11 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
      10             : !> \par History
      11             : !>      30-Nov-2000 (JGH) added input
      12             : !>      02-Jan-2001 (JGH) Parallel FFT
      13             : !>      28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
      14             : !>      06-Jun-2003 (JGH) Real space grid test
      15             : !>      Eigensolver test (29.08.05,MK)
      16             : !> \author JGH  6-NOV-2000
      17             : ! **************************************************************************************************
      18             : MODULE library_tests
      19             : 
      20             :    USE ai_coulomb_test,                 ONLY: eri_test
      21             :    USE cell_methods,                    ONLY: cell_create,&
      22             :                                               init_cell
      23             :    USE cell_types,                      ONLY: cell_release,&
      24             :                                               cell_type
      25             :    USE cg_test,                         ONLY: clebsch_gordon_test
      26             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
      27             :                                               cp_blacs_env_release,&
      28             :                                               cp_blacs_env_type
      29             :    USE cp_dbcsr_api,                    ONLY: dbcsr_reset_randmat_seed,&
      30             :                                               dbcsr_run_tests
      31             :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_perf_acc_test
      32             :    USE cp_files,                        ONLY: close_file,&
      33             :                                               open_file
      34             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_gemm
      35             :    USE cp_fm_diag,                      ONLY: cp_fm_syevd,&
      36             :                                               cp_fm_syevx
      37             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      38             :                                               cp_fm_struct_get,&
      39             :                                               cp_fm_struct_release,&
      40             :                                               cp_fm_struct_type
      41             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      42             :                                               cp_fm_pilaenv,&
      43             :                                               cp_fm_release,&
      44             :                                               cp_fm_set_all,&
      45             :                                               cp_fm_set_submatrix,&
      46             :                                               cp_fm_to_fm,&
      47             :                                               cp_fm_type
      48             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      49             :                                               cp_logger_type
      50             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      51             :                                               cp_print_key_unit_nr
      52             :    USE cp_realspace_grid_init,          ONLY: init_input_type
      53             :    USE dbm_tests,                       ONLY: dbm_run_tests
      54             :    USE fft_tools,                       ONLY: BWFFT,&
      55             :                                               FFT_RADIX_CLOSEST,&
      56             :                                               FWFFT,&
      57             :                                               fft3d,&
      58             :                                               fft_radix_operations,&
      59             :                                               finalize_fft,&
      60             :                                               init_fft
      61             :    USE global_types,                    ONLY: global_environment_type
      62             :    USE input_constants,                 ONLY: do_diag_syevd,&
      63             :                                               do_diag_syevx,&
      64             :                                               do_mat_random,&
      65             :                                               do_mat_read,&
      66             :                                               do_pwgrid_ns_fullspace,&
      67             :                                               do_pwgrid_ns_halfspace,&
      68             :                                               do_pwgrid_spherical
      69             :    USE input_section_types,             ONLY: section_vals_get,&
      70             :                                               section_vals_get_subs_vals,&
      71             :                                               section_vals_type,&
      72             :                                               section_vals_val_get
      73             :    USE kinds,                           ONLY: dp
      74             :    USE machine,                         ONLY: m_flush,&
      75             :                                               m_walltime
      76             :    USE message_passing,                 ONLY: mp_para_env_type
      77             :    USE minimax_exp,                     ONLY: validate_exp_minimax
      78             :    USE mp2_grids,                       ONLY: test_least_square_ft
      79             :    USE mp_perf_test,                    ONLY: mpi_perf_test
      80             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      81             :    USE parallel_rng_types,              ONLY: UNIFORM,&
      82             :                                               rng_stream_type
      83             :    USE pw_grid_types,                   ONLY: FULLSPACE,&
      84             :                                               HALFSPACE,&
      85             :                                               pw_grid_type
      86             :    USE pw_grids,                        ONLY: pw_grid_create,&
      87             :                                               pw_grid_release
      88             :    USE pw_methods,                      ONLY: pw_transfer,&
      89             :                                               pw_zero
      90             :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      91             :                                               pw_c3d_rs_type,&
      92             :                                               pw_r3d_rs_type
      93             :    USE realspace_grid_types,            ONLY: &
      94             :         realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs_grid_create, &
      95             :         rs_grid_create_descriptor, rs_grid_print, rs_grid_release, rs_grid_release_descriptor, &
      96             :         rs_grid_zero, transfer_pw2rs, transfer_rs2pw
      97             :    USE shg_integrals_test,              ONLY: shg_integrals_perf_acc_test
      98             : #include "./base/base_uses.f90"
      99             : 
     100             :    IMPLICIT NONE
     101             : 
     102             :    PRIVATE
     103             :    PUBLIC :: lib_test
     104             : 
     105             :    INTEGER                  :: runtest(100)
     106             :    REAL(KIND=dp)           :: max_memory
     107             :    REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
     108             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
     109             : 
     110             : CONTAINS
     111             : 
     112             : ! **************************************************************************************************
     113             : !> \brief Master routine for tests
     114             : !> \param root_section ...
     115             : !> \param para_env ...
     116             : !> \param globenv ...
     117             : !> \par History
     118             : !>      none
     119             : !> \author JGH  6-NOV-2000
     120             : ! **************************************************************************************************
     121         720 :    SUBROUTINE lib_test(root_section, para_env, globenv)
     122             : 
     123             :       TYPE(section_vals_type), POINTER                   :: root_section
     124             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     125             :       TYPE(global_environment_type), POINTER             :: globenv
     126             : 
     127             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'lib_test'
     128             : 
     129             :       INTEGER                                            :: handle, iw
     130             :       LOGICAL                                            :: explicit
     131             :       TYPE(cp_logger_type), POINTER                      :: logger
     132             :       TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
     133             :          dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
     134             :          rs_pw_transfer_section, shg_integrals_test_section
     135             : 
     136          80 :       CALL timeset(routineN, handle)
     137             : 
     138          80 :       logger => cp_get_default_logger()
     139          80 :       iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
     140             : 
     141          80 :       IF (iw > 0) THEN
     142          40 :          WRITE (iw, '(T2,79("*"))')
     143          40 :          WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
     144          40 :          WRITE (iw, '(T2,79("*"))')
     145             :       END IF
     146             :       !
     147          80 :       CALL test_input(root_section, para_env)
     148             :       !
     149          80 :       IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
     150             :       !
     151          80 :       IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.TRUE., test_dgemm=.FALSE., iw=iw)
     152          80 :       IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.FALSE., test_dgemm=.TRUE., iw=iw)
     153             :       !
     154          80 :       IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
     155           2 :                                          globenv%fftw_wisdom_file_name)
     156             :       !
     157          80 :       IF (runtest(4) /= 0) CALL eri_test(iw)
     158             :       !
     159          80 :       IF (runtest(6) /= 0) CALL clebsch_gordon_test()
     160             :       !
     161             :       ! runtest 7 has been deleted and can be recycled
     162             :       !
     163          80 :       IF (runtest(8) /= 0) CALL mpi_perf_test(para_env, runtest(8), iw)
     164             :       !
     165          80 :       IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
     166             :       !
     167          80 :       IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
     168             :       !
     169             : 
     170          80 :       rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
     171          80 :       CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
     172          80 :       IF (explicit) THEN
     173           2 :          CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
     174             :       END IF
     175             : 
     176          80 :       pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
     177          80 :       CALL section_vals_get(pw_transfer_section, explicit=explicit)
     178          80 :       IF (explicit) THEN
     179          10 :          CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
     180             :       END IF
     181             : 
     182          80 :       cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
     183          80 :       CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
     184          80 :       IF (explicit) THEN
     185           4 :          CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
     186             :       END IF
     187             : 
     188          80 :       eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
     189          80 :       CALL section_vals_get(eigensolver_section, explicit=explicit)
     190          80 :       IF (explicit) THEN
     191           2 :          CALL eigensolver_test(para_env, iw, eigensolver_section)
     192             :       END IF
     193             : 
     194          80 :       eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
     195          80 :       CALL section_vals_get(eri_mme_test_section, explicit=explicit)
     196          80 :       IF (explicit) THEN
     197           8 :          CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
     198             :       END IF
     199             : 
     200          80 :       shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
     201          80 :       CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
     202          80 :       IF (explicit) THEN
     203           4 :          CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
     204             :       END IF
     205             : 
     206             :       ! DBCSR tests
     207          80 :       cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_DBCSR")
     208          80 :       CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
     209          80 :       IF (explicit) THEN
     210          32 :          CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
     211             :       END IF
     212             : 
     213             :       ! DBM tests
     214          80 :       dbm_test_section => section_vals_get_subs_vals(root_section, "TEST%DBM")
     215          80 :       CALL section_vals_get(dbm_test_section, explicit=explicit)
     216          80 :       IF (explicit) THEN
     217          14 :          CALL run_dbm_tests(para_env, iw, dbm_test_section)
     218             :       END IF
     219             : 
     220          80 :       CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
     221             : 
     222          80 :       CALL timestop(handle)
     223             : 
     224          80 :    END SUBROUTINE lib_test
     225             : 
     226             : ! **************************************************************************************************
     227             : !> \brief Reads input section &TEST ... &END
     228             : !> \param root_section ...
     229             : !> \param para_env ...
     230             : !> \author JGH 30-NOV-2000
     231             : !> \note
     232             : !> I---------------------------------------------------------------------------I
     233             : !> I SECTION: &TEST ... &END                                                   I
     234             : !> I                                                                           I
     235             : !> I    MEMORY   max_memory                                                    I
     236             : !> I    COPY     n                                                             I
     237             : !> I    MATMUL   n                                                             I
     238             : !> I    FFT      n                                                             I
     239             : !> I    ERI      n                                                             I
     240             : !> I    PW_FFT   n                                                             I
     241             : !> I    Clebsch-Gordon n                                                       I
     242             : !> I    RS_GRIDS n                                                             I
     243             : !> I    MPI      n                                                             I
     244             : !> I    RNG      n             -> Parallel random number generator             I
     245             : !> I---------------------------------------------------------------------------I
     246             : ! **************************************************************************************************
     247         160 :    SUBROUTINE test_input(root_section, para_env)
     248             :       TYPE(section_vals_type), POINTER                   :: root_section
     249             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     250             : 
     251             :       TYPE(section_vals_type), POINTER                   :: test_section
     252             : 
     253             : !
     254             : !..defaults
     255             : ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
     256             : 
     257          80 :       runtest = 0
     258          80 :       test_section => section_vals_get_subs_vals(root_section, "TEST")
     259          80 :       CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
     260          80 :       CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
     261          80 :       CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
     262          80 :       CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
     263          80 :       CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
     264          80 :       CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
     265          80 :       CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
     266          80 :       CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
     267          80 :       CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
     268          80 :       CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
     269             : 
     270          80 :       CALL para_env%sync()
     271          80 :    END SUBROUTINE test_input
     272             : 
     273             : ! **************************************************************************************************
     274             : !> \brief Tests the performance to copy two vectors.
     275             : !> \param para_env ...
     276             : !> \param iw ...
     277             : !> \par History
     278             : !>      none
     279             : !> \author JGH  6-NOV-2000
     280             : !> \note
     281             : !>      The results of these tests allow to determine the size of the cache
     282             : !>      of the CPU. This can be used to optimize the performance of the
     283             : !>      FFTSG library.
     284             : ! **************************************************************************************************
     285           2 :    SUBROUTINE copy_test(para_env, iw)
     286             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     287             :       INTEGER                                            :: iw
     288             : 
     289             :       INTEGER                                            :: i, ierr, j, len, ntim, siz
     290             :       REAL(KIND=dp)                                      :: perf, t, tend, tstart
     291           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: ca, cb
     292             : 
     293             : ! test for copy --> Cache size
     294             : 
     295           2 :       siz = ABS(runtest(1))
     296           2 :       IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
     297          34 :       DO i = 6, 24
     298          34 :          len = 2**i
     299          34 :          IF (8.0_dp*REAL(len, KIND=dp) > max_memory*0.5_dp) EXIT
     300          96 :          ALLOCATE (ca(len), STAT=ierr)
     301          32 :          IF (ierr /= 0) EXIT
     302          64 :          ALLOCATE (cb(len), STAT=ierr)
     303          32 :          IF (ierr /= 0) EXIT
     304             : 
     305          32 :          CALL RANDOM_NUMBER(ca)
     306          32 :          ntim = NINT(1.e7_dp/REAL(len, KIND=dp))
     307          32 :          ntim = MAX(ntim, 1)
     308          32 :          ntim = MIN(ntim, siz*10000)
     309             : 
     310          32 :          tstart = m_walltime()
     311      512524 :          DO j = 1, ntim
     312   315058412 :             cb(:) = ca(:)
     313      512524 :             ca(1) = REAL(j, KIND=dp)
     314             :          END DO
     315          32 :          tend = m_walltime()
     316          32 :          t = tend - tstart + threshold
     317          32 :          IF (t > 0.0_dp) THEN
     318          32 :             perf = REAL(ntim, KIND=dp)*REAL(len, KIND=dp)*1.e-6_dp/t
     319             :          ELSE
     320           0 :             perf = 0.0_dp
     321             :          END IF
     322             : 
     323          32 :          IF (para_env%is_source()) THEN
     324          16 :             WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test:   Size = 2^", i, &
     325          32 :                len/1024, " Kwords", perf, " Mcopy/s"
     326             :          END IF
     327             : 
     328          32 :          DEALLOCATE (ca)
     329          34 :          DEALLOCATE (cb)
     330             :       END DO
     331           2 :       CALL para_env%sync()
     332           2 :    END SUBROUTINE copy_test
     333             : 
     334             : ! **************************************************************************************************
     335             : !> \brief Tests the performance of different kinds of matrix matrix multiply
     336             : !>      kernels for the BLAS and F95 intrinsic matmul.
     337             : !> \param para_env ...
     338             : !> \param test_matmul ...
     339             : !> \param test_dgemm ...
     340             : !> \param iw ...
     341             : !> \par History
     342             : !>      none
     343             : !> \author JGH  6-NOV-2000
     344             : ! **************************************************************************************************
     345           2 :    SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
     346             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     347             :       LOGICAL                                            :: test_matmul, test_dgemm
     348             :       INTEGER                                            :: iw
     349             : 
     350             :       INTEGER                                            :: i, ierr, j, len, ntim, siz
     351             :       REAL(KIND=dp)                                      :: perf, t, tend, tstart, xdum
     352           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ma, mb, mc
     353             : 
     354             : ! test for matrix multpies
     355             : 
     356           2 :       IF (test_matmul) THEN
     357           2 :          siz = ABS(runtest(2))
     358           2 :          IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
     359           2 :          DO i = 5, siz, 2
     360           4 :             len = 2**i + 1
     361           4 :             IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
     362          16 :             ALLOCATE (ma(len, len), STAT=ierr)
     363           4 :             IF (ierr /= 0) EXIT
     364          12 :             ALLOCATE (mb(len, len), STAT=ierr)
     365           4 :             IF (ierr /= 0) EXIT
     366          12 :             ALLOCATE (mc(len, len), STAT=ierr)
     367           4 :             IF (ierr /= 0) EXIT
     368       35788 :             mc = 0.0_dp
     369             : 
     370           4 :             CALL RANDOM_NUMBER(xdum)
     371       35788 :             ma = xdum
     372           4 :             CALL RANDOM_NUMBER(xdum)
     373       35788 :             mb = xdum
     374           4 :             ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
     375           4 :             ntim = MAX(ntim, 1)
     376           4 :             ntim = MIN(ntim, siz*200)
     377           4 :             tstart = m_walltime()
     378        2832 :             DO j = 1, ntim
     379        2828 :                mc(:, :) = MATMUL(ma, mb)
     380        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     381             :             END DO
     382           4 :             tend = m_walltime()
     383           4 :             t = tend - tstart + threshold
     384           4 :             perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     385           4 :             IF (para_env%is_source()) THEN
     386             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     387           2 :                   " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
     388             :             END IF
     389           4 :             tstart = m_walltime()
     390        2832 :             DO j = 1, ntim
     391     3895652 :                mc(:, :) = mc + MATMUL(ma, mb)
     392        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     393             :             END DO
     394           4 :             tend = m_walltime()
     395           4 :             t = tend - tstart + threshold
     396           4 :             IF (t > 0.0_dp) THEN
     397           4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     398             :             ELSE
     399           0 :                perf = 0.0_dp
     400             :             END IF
     401             : 
     402           4 :             IF (para_env%is_source()) THEN
     403             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     404           2 :                   " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
     405             :             END IF
     406             : 
     407           4 :             tstart = m_walltime()
     408        2832 :             DO j = 1, ntim
     409     3895652 :                mc(:, :) = mc + MATMUL(ma, TRANSPOSE(mb))
     410        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     411             :             END DO
     412           4 :             tend = m_walltime()
     413           4 :             t = tend - tstart + threshold
     414           4 :             IF (t > 0.0_dp) THEN
     415           4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     416             :             ELSE
     417           0 :                perf = 0.0_dp
     418             :             END IF
     419             : 
     420           4 :             IF (para_env%is_source()) THEN
     421             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     422           2 :                   " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
     423             :             END IF
     424             : 
     425           4 :             tstart = m_walltime()
     426        2832 :             DO j = 1, ntim
     427     3895652 :                mc(:, :) = mc + MATMUL(TRANSPOSE(ma), mb)
     428        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     429             :             END DO
     430           4 :             tend = m_walltime()
     431           4 :             t = tend - tstart + threshold
     432           4 :             IF (t > 0.0_dp) THEN
     433           4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     434             :             ELSE
     435           0 :                perf = 0.0_dp
     436             :             END IF
     437             : 
     438           4 :             IF (para_env%is_source()) THEN
     439             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     440           2 :                   " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
     441             :             END IF
     442             : 
     443           4 :             DEALLOCATE (ma)
     444           4 :             DEALLOCATE (mb)
     445           4 :             DEALLOCATE (mc)
     446             :          END DO
     447             :       END IF
     448             : 
     449             :       ! test for matrix multpies
     450           2 :       IF (test_dgemm) THEN
     451           0 :          siz = ABS(runtest(5))
     452           0 :          IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
     453           0 :          DO i = 5, siz, 2
     454           0 :             len = 2**i + 1
     455           0 :             IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
     456           0 :             ALLOCATE (ma(len, len), STAT=ierr)
     457           0 :             IF (ierr /= 0) EXIT
     458           0 :             ALLOCATE (mb(len, len), STAT=ierr)
     459           0 :             IF (ierr /= 0) EXIT
     460           0 :             ALLOCATE (mc(len, len), STAT=ierr)
     461           0 :             IF (ierr /= 0) EXIT
     462           0 :             mc = 0.0_dp
     463             : 
     464           0 :             CALL RANDOM_NUMBER(xdum)
     465           0 :             ma = xdum
     466           0 :             CALL RANDOM_NUMBER(xdum)
     467           0 :             mb = xdum
     468           0 :             ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
     469           0 :             ntim = MAX(ntim, 1)
     470           0 :             ntim = MIN(ntim, 1000)
     471             : 
     472           0 :             tstart = m_walltime()
     473           0 :             DO j = 1, ntim
     474           0 :                CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     475             :             END DO
     476           0 :             tend = m_walltime()
     477           0 :             t = tend - tstart + threshold
     478           0 :             IF (t > 0.0_dp) THEN
     479           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     480             :             ELSE
     481           0 :                perf = 0.0_dp
     482             :             END IF
     483             : 
     484           0 :             IF (para_env%is_source()) THEN
     485             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     486           0 :                   " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
     487             :             END IF
     488             : 
     489           0 :             tstart = m_walltime()
     490           0 :             DO j = 1, ntim
     491           0 :                CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     492             :             END DO
     493           0 :             tend = m_walltime()
     494           0 :             t = tend - tstart + threshold
     495           0 :             IF (t > 0.0_dp) THEN
     496           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     497             :             ELSE
     498           0 :                perf = 0.0_dp
     499             :             END IF
     500             : 
     501           0 :             IF (para_env%is_source()) THEN
     502             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     503           0 :                   " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
     504             :             END IF
     505             : 
     506           0 :             tstart = m_walltime()
     507           0 :             DO j = 1, ntim
     508           0 :                CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     509             :             END DO
     510           0 :             tend = m_walltime()
     511           0 :             t = tend - tstart + threshold
     512           0 :             IF (t > 0.0_dp) THEN
     513           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     514             :             ELSE
     515           0 :                perf = 0.0_dp
     516             :             END IF
     517             : 
     518           0 :             IF (para_env%is_source()) THEN
     519             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     520           0 :                   " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
     521             :             END IF
     522             : 
     523           0 :             tstart = m_walltime()
     524           0 :             DO j = 1, ntim
     525           0 :                CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     526             :             END DO
     527           0 :             tend = m_walltime()
     528           0 :             t = tend - tstart + threshold
     529           0 :             IF (t > 0.0_dp) THEN
     530           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     531             :             ELSE
     532           0 :                perf = 0.0_dp
     533             :             END IF
     534             : 
     535           0 :             IF (para_env%is_source()) THEN
     536             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     537           0 :                   " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
     538             :             END IF
     539             : 
     540           0 :             DEALLOCATE (ma)
     541           0 :             DEALLOCATE (mb)
     542           0 :             DEALLOCATE (mc)
     543             :          END DO
     544             :       END IF
     545             : 
     546           2 :       CALL para_env%sync()
     547             : 
     548           2 :    END SUBROUTINE matmul_test
     549             : 
     550             : ! **************************************************************************************************
     551             : !> \brief Tests the performance of all available FFT libraries for 3D FFTs
     552             : !> \param para_env ...
     553             : !> \param iw ...
     554             : !> \param fftw_plan_type ...
     555             : !> \param wisdom_file where FFTW3 should look to save/load wisdom
     556             : !> \par History
     557             : !>      none
     558             : !> \author JGH  6-NOV-2000
     559             : ! **************************************************************************************************
     560           2 :    SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
     561             : 
     562             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     563             :       INTEGER                                            :: iw, fftw_plan_type
     564             :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     565             : 
     566             :       INTEGER, PARAMETER                                 :: ndate(3) = (/12, 48, 96/)
     567             : 
     568             :       INTEGER                                            :: iall, ierr, it, j, len, n(3), ntim, &
     569             :                                                             radix_in, radix_out, siz, stat
     570             :       COMPLEX(KIND=dp), DIMENSION(4, 4, 4)               :: zz
     571           2 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: ca, cb, cc
     572             :       CHARACTER(LEN=7)                                   :: method
     573             :       REAL(KIND=dp)                                      :: flops, perf, scale, t, tdiff, tend, &
     574             :                                                             tstart
     575           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: ra
     576             : 
     577             : ! test for 3d FFT
     578             : 
     579           2 :       IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
     580           2 :       siz = ABS(runtest(3))
     581             : 
     582           8 :       DO iall = 1, 100
     583             :          SELECT CASE (iall)
     584             :          CASE DEFAULT
     585           2 :             EXIT
     586             :          CASE (1)
     587             :             CALL init_fft("FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
     588           2 :                           pool_limit=10, plan_style=fftw_plan_type)
     589           2 :             method = "FFTSG  "
     590             :          CASE (2)
     591           2 :             CYCLE
     592             :          CASE (3)
     593             :             CALL init_fft("FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
     594           2 :                           pool_limit=10, plan_style=fftw_plan_type)
     595          10 :             method = "FFTW3  "
     596             :          END SELECT
     597          16 :          n = 4
     598           4 :          zz = 0.0_dp
     599           4 :          CALL fft3d(FWFFT, n, zz, status=stat)
     600           4 :          IF (stat == 0) THEN
     601          16 :             DO it = 1, 3
     602          12 :                radix_in = ndate(it)
     603          12 :                CALL fft_radix_operations(radix_in, radix_out, FFT_RADIX_CLOSEST)
     604          12 :                len = radix_out
     605          48 :                n = len
     606          12 :                IF (16.0_dp*REAL(len*len*len, KIND=dp) > max_memory*0.5_dp) EXIT
     607          60 :                ALLOCATE (ra(len, len, len), STAT=ierr)
     608          60 :                ALLOCATE (ca(len, len, len), STAT=ierr)
     609          12 :                CALL RANDOM_NUMBER(ra)
     610     4035516 :                ca(:, :, :) = ra
     611          12 :                CALL RANDOM_NUMBER(ra)
     612     4035516 :                ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
     613          12 :                flops = REAL(len**3, KIND=dp)*15.0_dp*LOG(REAL(len, KIND=dp))
     614          12 :                ntim = NINT(siz*1.e7_dp/flops)
     615          12 :                ntim = MAX(ntim, 1)
     616          12 :                ntim = MIN(ntim, 200)
     617          12 :                scale = 1.0_dp/REAL(len**3, KIND=dp)
     618          12 :                tstart = m_walltime()
     619         884 :                DO j = 1, ntim
     620         872 :                   CALL fft3d(FWFFT, n, ca)
     621         884 :                   CALL fft3d(BWFFT, n, ca)
     622             :                END DO
     623          12 :                tend = m_walltime()
     624          12 :                t = tend - tstart + threshold
     625          12 :                IF (t > 0.0_dp) THEN
     626          12 :                   perf = REAL(ntim, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
     627             :                ELSE
     628           0 :                   perf = 0.0_dp
     629             :                END IF
     630             : 
     631          12 :                IF (para_env%is_source()) THEN
     632             :                   WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
     633           6 :                      ADJUSTR(method), " test (in-place)    Size = ", len, perf, " Mflop/s"
     634             :                END IF
     635          12 :                DEALLOCATE (ca)
     636          16 :                DEALLOCATE (ra)
     637             :             END DO
     638           4 :             IF (para_env%is_source()) WRITE (iw, *)
     639             :             ! test if input data is preserved
     640           4 :             len = 24
     641          16 :             n = len
     642           4 :             ALLOCATE (ra(len, len, len))
     643           4 :             ALLOCATE (ca(len, len, len))
     644           4 :             ALLOCATE (cb(len, len, len))
     645           4 :             ALLOCATE (cc(len, len, len))
     646           4 :             CALL RANDOM_NUMBER(ra)
     647       57700 :             ca(:, :, :) = ra
     648           4 :             CALL RANDOM_NUMBER(ra)
     649       57700 :             ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
     650       57700 :             cc(:, :, :) = ca
     651           4 :             CALL fft3d(FWFFT, n, ca, cb)
     652       57700 :             tdiff = MAXVAL(ABS(ca - cc))
     653           4 :             IF (tdiff > 1.0E-12_dp) THEN
     654           0 :                IF (para_env%is_source()) &
     655           0 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
     656           0 :                   "             Input array is changed in out-of-place FFT !"
     657             :             ELSE
     658           4 :                IF (para_env%is_source()) &
     659           2 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
     660           4 :                   "         Input array is not changed in out-of-place FFT !"
     661             :             END IF
     662       57700 :             ca(:, :, :) = cc
     663           4 :             CALL fft3d(BWFFT, n, ca, cb)
     664       57700 :             tdiff = MAXVAL(ABS(ca - cc))
     665           4 :             IF (tdiff > 1.0E-12_dp) THEN
     666           0 :                IF (para_env%is_source()) &
     667           0 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
     668           0 :                   "             Input array is changed in out-of-place FFT !"
     669             :             ELSE
     670           4 :                IF (para_env%is_source()) &
     671           2 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
     672           4 :                   "         Input array is not changed in out-of-place FFT !"
     673             :             END IF
     674           4 :             IF (para_env%is_source()) WRITE (iw, *)
     675             : 
     676           4 :             DEALLOCATE (ra)
     677           4 :             DEALLOCATE (ca)
     678           4 :             DEALLOCATE (cb)
     679           4 :             DEALLOCATE (cc)
     680             :          END IF
     681           4 :          CALL finalize_fft(para_env, wisdom_file=wisdom_file)
     682             :       END DO
     683             : 
     684           2 :    END SUBROUTINE fft_test
     685             : 
     686             : ! **************************************************************************************************
     687             : !> \brief   test rs_pw_transfer performance
     688             : !> \param para_env ...
     689             : !> \param iw ...
     690             : !> \param globenv ...
     691             : !> \param rs_pw_transfer_section ...
     692             : !> \author  Joost VandeVondele
     693             : !>      9.2008 Randomise rs grid [Iain Bethune]
     694             : !>      (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
     695             : ! **************************************************************************************************
     696           2 :    SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
     697             : 
     698             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     699             :       INTEGER                                            :: iw
     700             :       TYPE(global_environment_type), POINTER             :: globenv
     701             :       TYPE(section_vals_type), POINTER                   :: rs_pw_transfer_section
     702             : 
     703             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test'
     704             : 
     705             :       INTEGER                                            :: halo_size, handle, i_loop, n_loop, ns_max
     706             :       INTEGER, DIMENSION(3)                              :: no, np
     707           2 :       INTEGER, DIMENSION(:), POINTER                     :: i_vals
     708             :       LOGICAL                                            :: do_rs2pw
     709             :       REAL(KIND=dp)                                      :: tend, tstart
     710             :       TYPE(cell_type), POINTER                           :: box
     711             :       TYPE(pw_grid_type), POINTER                        :: grid
     712             :       TYPE(pw_r3d_rs_type)                               :: ca
     713             :       TYPE(realspace_grid_desc_type), POINTER            :: rs_desc
     714             :       TYPE(realspace_grid_input_type)                    :: input_settings
     715          32 :       TYPE(realspace_grid_type)                          :: rs_grid
     716             :       TYPE(section_vals_type), POINTER                   :: rs_grid_section
     717             : 
     718           2 :       CALL timeset(routineN, handle)
     719             : 
     720             :       !..set fft lib
     721             :       CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
     722             :                     pool_limit=globenv%fft_pool_scratch_limit, &
     723             :                     wisdom_file=globenv%fftw_wisdom_file_name, &
     724           2 :                     plan_style=globenv%fftw_plan_type)
     725             : 
     726             :       ! .. set cell (should otherwise be irrelevant)
     727           2 :       NULLIFY (box)
     728           2 :       CALL cell_create(box)
     729             :       box%hmat = RESHAPE((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
     730          26 :                            0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
     731           2 :       CALL init_cell(box)
     732             : 
     733             :       ! .. grid type and pw_grid
     734           2 :       NULLIFY (grid)
     735           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
     736           8 :       np = i_vals
     737           2 :       CALL pw_grid_create(grid, para_env, box%hmat, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw)
     738           8 :       no = grid%npts
     739             : 
     740           2 :       CALL ca%create(grid)
     741           2 :       CALL pw_zero(ca)
     742             : 
     743             :       ! .. rs input setting type
     744           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
     745           2 :       rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
     746           2 :       ns_max = 2*halo_size + 1
     747           2 :       CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
     748             : 
     749             :       ! .. rs type
     750           2 :       NULLIFY (rs_desc)
     751           2 :       CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
     752           2 :       CALL rs_grid_create(rs_grid, rs_desc)
     753           2 :       CALL rs_grid_print(rs_grid, iw)
     754           2 :       CALL rs_grid_zero(rs_grid)
     755             : 
     756             :       ! Put random values on the grid, so summation check will pick up errors
     757           2 :       CALL RANDOM_NUMBER(rs_grid%r)
     758             : 
     759           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=N_loop)
     760           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
     761             : 
     762             :       ! go for the real loops, sync to get max timings
     763           2 :       IF (para_env%is_source()) THEN
     764           1 :          WRITE (iw, '(T2,A)') ""
     765           1 :          WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
     766           1 :          WRITE (iw, '(T2,A)') ""
     767           1 :          WRITE (iw, '(T2,A)') "iteration      time[s]"
     768             :       END IF
     769           8 :       DO i_loop = 1, N_loop
     770           6 :          CALL para_env%sync()
     771           6 :          tstart = m_walltime()
     772           6 :          IF (do_rs2pw) THEN
     773           6 :             CALL transfer_rs2pw(rs_grid, ca)
     774             :          ELSE
     775           0 :             CALL transfer_pw2rs(rs_grid, ca)
     776             :          END IF
     777           6 :          CALL para_env%sync()
     778           6 :          tend = m_walltime()
     779           8 :          IF (para_env%is_source()) THEN
     780           3 :             WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
     781             :          END IF
     782             :       END DO
     783             : 
     784             :       !cleanup
     785           2 :       CALL rs_grid_release(rs_grid)
     786           2 :       CALL rs_grid_release_descriptor(rs_desc)
     787           2 :       CALL ca%release()
     788           2 :       CALL pw_grid_release(grid)
     789           2 :       CALL cell_release(box)
     790           2 :       CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
     791             : 
     792           2 :       CALL timestop(handle)
     793             : 
     794          10 :    END SUBROUTINE rs_pw_transfer_test
     795             : 
     796             : ! **************************************************************************************************
     797             : !> \brief Tests the performance of PW calls to FFT routines
     798             : !> \param para_env ...
     799             : !> \param iw ...
     800             : !> \param globenv ...
     801             : !> \param pw_transfer_section ...
     802             : !> \par History
     803             : !>      JGH  6-Feb-2001 : Test and performance code
     804             : !>      Made input sensitive [Joost VandeVondele]
     805             : !> \author JGH  1-JAN-2001
     806             : ! **************************************************************************************************
     807          10 :    SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
     808             : 
     809             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     810             :       INTEGER                                            :: iw
     811             :       TYPE(global_environment_type), POINTER             :: globenv
     812             :       TYPE(section_vals_type), POINTER                   :: pw_transfer_section
     813             : 
     814             :       REAL(KIND=dp), PARAMETER                           :: toler = 1.e-11_dp
     815             : 
     816             :       INTEGER                                            :: blocked_id, grid_span, i_layout, i_rep, &
     817             :                                                             ig, ip, itmp, n_loop, n_rep, nn, p, q
     818          10 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: layouts
     819             :       INTEGER, DIMENSION(2)                              :: distribution_layout
     820             :       INTEGER, DIMENSION(3)                              :: no, np
     821          10 :       INTEGER, DIMENSION(:), POINTER                     :: i_vals
     822             :       LOGICAL                                            :: debug, is_fullspace, odd, &
     823             :                                                             pw_grid_layout_all, spherical
     824             :       REAL(KIND=dp)                                      :: em, et, flops, gsq, perf, t, t_max, &
     825             :                                                             t_min, tend, tstart
     826          10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: t_end, t_start
     827             :       TYPE(cell_type), POINTER                           :: box
     828             :       TYPE(pw_c1d_gs_type)                               :: ca, cc
     829             :       TYPE(pw_c3d_rs_type)                               :: cb
     830             :       TYPE(pw_grid_type), POINTER                        :: grid
     831             : 
     832             : !..set fft lib
     833             : 
     834             :       CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
     835             :                     pool_limit=globenv%fft_pool_scratch_limit, &
     836             :                     wisdom_file=globenv%fftw_wisdom_file_name, &
     837          10 :                     plan_style=globenv%fftw_plan_type)
     838             : 
     839             :       !..the unit cell (should not really matter, the number of grid points do)
     840          10 :       NULLIFY (box, grid)
     841          10 :       CALL cell_create(box)
     842             :       box%hmat = RESHAPE((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
     843         130 :                            0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
     844          10 :       CALL init_cell(box)
     845             : 
     846          10 :       CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
     847          70 :       DO i_rep = 1, n_rep
     848             : 
     849             :          ! how often should we do the transfer
     850          60 :          CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
     851         180 :          ALLOCATE (t_start(N_loop))
     852         120 :          ALLOCATE (t_end(N_loop))
     853             : 
     854             :          ! setup of the grids
     855          60 :          CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
     856         240 :          np = i_vals
     857             : 
     858          60 :          CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
     859          60 :          CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
     860             : 
     861             :          CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
     862          60 :                                    l_val=pw_grid_layout_all)
     863             : 
     864             :          ! prepare to loop over all or a specific layout
     865          60 :          IF (pw_grid_layout_all) THEN
     866             :             ! count layouts that fit
     867           8 :             itmp = 0
     868             :             ! start from 2, (/1,para_env%num_pe/) is not supported
     869          16 :             DO p = 2, para_env%num_pe
     870           8 :                q = para_env%num_pe/p
     871          16 :                IF (p*q == para_env%num_pe) THEN
     872           8 :                   itmp = itmp + 1
     873             :                END IF
     874             :             END DO
     875             :             ! build list
     876          24 :             ALLOCATE (layouts(2, itmp))
     877           8 :             itmp = 0
     878          16 :             DO p = 2, para_env%num_pe
     879           8 :                q = para_env%num_pe/p
     880          16 :                IF (p*q == para_env%num_pe) THEN
     881           8 :                   itmp = itmp + 1
     882          24 :                   layouts(:, itmp) = (/p, q/)
     883             :                END IF
     884             :             END DO
     885             :          ELSE
     886          52 :             CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
     887          52 :             ALLOCATE (layouts(2, 1))
     888         156 :             layouts(:, 1) = i_vals
     889             :          END IF
     890             : 
     891         120 :          DO i_layout = 1, SIZE(layouts, 2)
     892             : 
     893         180 :             distribution_layout = layouts(:, i_layout)
     894             : 
     895          60 :             CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
     896             : 
     897             :             ! from cp_control_utils
     898          16 :             SELECT CASE (itmp)
     899             :             CASE (do_pwgrid_spherical)
     900          16 :                spherical = .TRUE.
     901          16 :                is_fullspace = .FALSE.
     902             :             CASE (do_pwgrid_ns_fullspace)
     903          28 :                spherical = .FALSE.
     904          28 :                is_fullspace = .TRUE.
     905             :             CASE (do_pwgrid_ns_halfspace)
     906          16 :                spherical = .FALSE.
     907          60 :                is_fullspace = .FALSE.
     908             :             END SELECT
     909             : 
     910             :             ! from pw_env_methods
     911          60 :             IF (spherical) THEN
     912          16 :                grid_span = HALFSPACE
     913          16 :                spherical = .TRUE.
     914          16 :                odd = .TRUE.
     915          44 :             ELSE IF (is_fullspace) THEN
     916          28 :                grid_span = FULLSPACE
     917          28 :                spherical = .FALSE.
     918          28 :                odd = .FALSE.
     919             :             ELSE
     920          16 :                grid_span = HALFSPACE
     921          16 :                spherical = .FALSE.
     922          16 :                odd = .TRUE.
     923             :             END IF
     924             : 
     925             :             ! actual setup
     926             :             CALL pw_grid_create(grid, para_env, box%hmat, grid_span=grid_span, odd=odd, spherical=spherical, &
     927             :                                 blocked=blocked_id, npts=np, fft_usage=.TRUE., &
     928          60 :                                 rs_dims=distribution_layout, iounit=iw)
     929             : 
     930          60 :             IF (iw > 0) CALL m_flush(iw)
     931             : 
     932             :             ! note that the number of grid points might be different from what the user requested (fft-able needed)
     933         240 :             no = grid%npts
     934             : 
     935          60 :             CALL ca%create(grid)
     936          60 :             CALL cb%create(grid)
     937          60 :             CALL cc%create(grid)
     938             : 
     939             :             ! initialize data
     940          60 :             CALL pw_zero(ca)
     941          60 :             CALL pw_zero(cb)
     942          60 :             CALL pw_zero(cc)
     943          60 :             nn = SIZE(ca%array)
     944      142042 :             DO ig = 1, nn
     945      141982 :                gsq = grid%gsq(ig)
     946      142042 :                ca%array(ig) = EXP(-gsq)
     947             :             END DO
     948             : 
     949         420 :             flops = PRODUCT(no)*30.0_dp*LOG(REAL(MAXVAL(no), KIND=dp))
     950          60 :             tstart = m_walltime()
     951         596 :             DO ip = 1, n_loop
     952         536 :                CALL para_env%sync()
     953         536 :                t_start(ip) = m_walltime()
     954         536 :                CALL pw_transfer(ca, cb, debug)
     955         536 :                CALL pw_transfer(cb, cc, debug)
     956         536 :                CALL para_env%sync()
     957         596 :                t_end(ip) = m_walltime()
     958             :             END DO
     959          60 :             tend = m_walltime()
     960          60 :             t = tend - tstart + threshold
     961          60 :             IF (t > 0.0_dp) THEN
     962          60 :                perf = REAL(n_loop, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
     963             :             ELSE
     964           0 :                perf = 0.0_dp
     965             :             END IF
     966             : 
     967      142102 :             em = MAXVAL(ABS(ca%array(:) - cc%array(:)))
     968          60 :             CALL para_env%max(em)
     969      142042 :             et = SUM(ABS(ca%array(:) - cc%array(:)))
     970          60 :             CALL para_env%sum(et)
     971         656 :             t_min = MINVAL(t_end - t_start)
     972         656 :             t_max = MAXVAL(t_end - t_start)
     973             : 
     974          60 :             IF (para_env%is_source()) THEN
     975          30 :                WRITE (iw, *)
     976          30 :                WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
     977          30 :                WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
     978             :                WRITE (iw, '(A,T67,F14.0)') &
     979          30 :                   " Parallel FFT Tests: Performance [Mflops] ", perf
     980          30 :                WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
     981          30 :                WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
     982          30 :                IF (iw > 0) CALL m_flush(iw)
     983             :             END IF
     984             : 
     985             :             ! need debugging ???
     986          60 :             IF (em > toler .OR. et > toler) THEN
     987           0 :                CPWARN("The FFT results are not accurate ... starting debug pw_transfer")
     988           0 :                CALL pw_transfer(ca, cb, .TRUE.)
     989           0 :                CALL pw_transfer(cb, cc, .TRUE.)
     990             :             END IF
     991             : 
     992             :             ! done with these grids
     993          60 :             CALL ca%release()
     994          60 :             CALL cb%release()
     995          60 :             CALL cc%release()
     996         180 :             CALL pw_grid_release(grid)
     997             : 
     998             :          END DO
     999             : 
    1000             :          ! local arrays
    1001          60 :          DEALLOCATE (layouts)
    1002          60 :          DEALLOCATE (t_start)
    1003         190 :          DEALLOCATE (t_end)
    1004             : 
    1005             :       END DO
    1006             : 
    1007             :       ! cleanup
    1008          10 :       CALL cell_release(box)
    1009          10 :       CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
    1010             : 
    1011          20 :    END SUBROUTINE pw_fft_test
    1012             : 
    1013             : ! **************************************************************************************************
    1014             : !> \brief Tests the eigensolver library routines
    1015             : !> \param para_env ...
    1016             : !> \param iw ...
    1017             : !> \param eigensolver_section ...
    1018             : !> \par History
    1019             : !>      JGH  6-Feb-2001 : Test and performance code
    1020             : !> \author JGH  1-JAN-2001
    1021             : ! **************************************************************************************************
    1022           2 :    SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
    1023             : 
    1024             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1025             :       INTEGER                                            :: iw
    1026             :       TYPE(section_vals_type), POINTER                   :: eigensolver_section
    1027             : 
    1028             :       INTEGER                                            :: diag_method, i, i_loop, i_rep, &
    1029             :                                                             init_method, j, n, n_loop, n_rep, &
    1030             :                                                             neig, unit_number
    1031             :       REAL(KIND=dp)                                      :: t1, t2
    1032           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues
    1033           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: buffer
    1034             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1035             :       TYPE(cp_fm_struct_type), POINTER                   :: fmstruct
    1036             :       TYPE(cp_fm_type)                                   :: eigenvectors, matrix, work
    1037           2 :       TYPE(rng_stream_type), ALLOCATABLE                 :: rng_stream
    1038             : 
    1039           2 :       IF (iw > 0) THEN
    1040           1 :          WRITE (UNIT=iw, FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
    1041             :       END IF
    1042             : 
    1043             :       ! create blacs env corresponding to para_env
    1044           2 :       NULLIFY (blacs_env)
    1045             :       CALL cp_blacs_env_create(blacs_env=blacs_env, &
    1046           2 :                                para_env=para_env)
    1047             : 
    1048             :       ! loop over all tests
    1049           2 :       CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
    1050          10 :       DO i_rep = 1, n_rep
    1051             : 
    1052             :          ! parse section
    1053           8 :          CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
    1054           8 :          CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
    1055           8 :          CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
    1056           8 :          CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
    1057           8 :          CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
    1058             : 
    1059             :          ! proper number of eigs
    1060           8 :          IF (neig < 0) neig = n
    1061           8 :          neig = MIN(neig, n)
    1062             : 
    1063             :          ! report
    1064           8 :          IF (iw > 0) THEN
    1065           4 :             WRITE (iw, *) "Matrix size", n
    1066           4 :             WRITE (iw, *) "Number of eigenvalues", neig
    1067           4 :             WRITE (iw, *) "Timing loops", n_loop
    1068           2 :             SELECT CASE (diag_method)
    1069             :             CASE (do_diag_syevd)
    1070           2 :                WRITE (iw, *) "Diag using syevd"
    1071             :             CASE (do_diag_syevx)
    1072           4 :                WRITE (iw, *) "Diag using syevx"
    1073             :             CASE DEFAULT
    1074             :                ! stop
    1075             :             END SELECT
    1076             : 
    1077           4 :             SELECT CASE (init_method)
    1078             :             CASE (do_mat_random)
    1079           4 :                WRITE (iw, *) "using random matrix"
    1080             :             CASE (do_mat_read)
    1081           4 :                WRITE (iw, *) "reading from file"
    1082             :             CASE DEFAULT
    1083             :                ! stop
    1084             :             END SELECT
    1085             :          END IF
    1086             : 
    1087             :          ! create matrix struct type
    1088           8 :          NULLIFY (fmstruct)
    1089             :          CALL cp_fm_struct_create(fmstruct=fmstruct, &
    1090             :                                   para_env=para_env, &
    1091             :                                   context=blacs_env, &
    1092             :                                   nrow_global=n, &
    1093           8 :                                   ncol_global=n)
    1094             : 
    1095             :          ! create all needed matrices, and buffers for the eigenvalues
    1096             :          CALL cp_fm_create(matrix=matrix, &
    1097             :                            matrix_struct=fmstruct, &
    1098           8 :                            name="MATRIX")
    1099           8 :          CALL cp_fm_set_all(matrix, 0.0_dp)
    1100             : 
    1101             :          CALL cp_fm_create(matrix=eigenvectors, &
    1102             :                            matrix_struct=fmstruct, &
    1103           8 :                            name="EIGENVECTORS")
    1104           8 :          CALL cp_fm_set_all(eigenvectors, 0.0_dp)
    1105             : 
    1106             :          CALL cp_fm_create(matrix=work, &
    1107             :                            matrix_struct=fmstruct, &
    1108           8 :                            name="WORK")
    1109           8 :          CALL cp_fm_set_all(matrix, 0.0_dp)
    1110             : 
    1111          24 :          ALLOCATE (eigenvalues(n))
    1112         100 :          eigenvalues = 0.0_dp
    1113          16 :          ALLOCATE (buffer(1, n))
    1114             : 
    1115             :          ! generate initial matrix, either by reading a file, or using random numbers
    1116           8 :          IF (para_env%is_source()) THEN
    1117           4 :             SELECT CASE (init_method)
    1118             :             CASE (do_mat_random)
    1119             :                rng_stream = rng_stream_type( &
    1120             :                             name="rng_stream", &
    1121             :                             distribution_type=UNIFORM, &
    1122           4 :                             extended_precision=.TRUE.)
    1123             :             CASE (do_mat_read)
    1124             :                CALL open_file(file_name="MATRIX", &
    1125             :                               file_action="READ", &
    1126             :                               file_form="FORMATTED", &
    1127             :                               file_status="OLD", &
    1128           4 :                               unit_number=unit_number)
    1129             :             END SELECT
    1130             :          END IF
    1131             : 
    1132         100 :          DO i = 1, n
    1133          92 :             IF (para_env%is_source()) THEN
    1134             :                SELECT CASE (init_method)
    1135             :                CASE (do_mat_random)
    1136         347 :                   DO j = i, n
    1137         347 :                      buffer(1, j) = rng_stream%next() - 0.5_dp
    1138             :                   END DO
    1139             :                   !MK activate/modify for a diagonal dominant symmetric matrix:
    1140             :                   !MK buffer(1,i) = 10.0_dp*buffer(1,i)
    1141             :                CASE (do_mat_read)
    1142          46 :                   READ (UNIT=unit_number, FMT=*) buffer(1, 1:n)
    1143             :                END SELECT
    1144             :             END IF
    1145          92 :             CALL para_env%bcast(buffer)
    1146           8 :             SELECT CASE (init_method)
    1147             :             CASE (do_mat_random)
    1148             :                CALL cp_fm_set_submatrix(fm=matrix, &
    1149             :                                         new_values=buffer, &
    1150             :                                         start_row=i, &
    1151             :                                         start_col=i, &
    1152             :                                         n_rows=1, &
    1153             :                                         n_cols=n - i + 1, &
    1154             :                                         alpha=1.0_dp, &
    1155             :                                         beta=0.0_dp, &
    1156          92 :                                         transpose=.FALSE.)
    1157             :                CALL cp_fm_set_submatrix(fm=matrix, &
    1158             :                                         new_values=buffer, &
    1159             :                                         start_row=i, &
    1160             :                                         start_col=i, &
    1161             :                                         n_rows=n - i + 1, &
    1162             :                                         n_cols=1, &
    1163             :                                         alpha=1.0_dp, &
    1164             :                                         beta=0.0_dp, &
    1165          92 :                                         transpose=.TRUE.)
    1166             :             CASE (do_mat_read)
    1167             :                CALL cp_fm_set_submatrix(fm=matrix, &
    1168             :                                         new_values=buffer, &
    1169             :                                         start_row=i, &
    1170             :                                         start_col=1, &
    1171             :                                         n_rows=1, &
    1172             :                                         n_cols=n, &
    1173             :                                         alpha=1.0_dp, &
    1174             :                                         beta=0.0_dp, &
    1175          92 :                                         transpose=.FALSE.)
    1176             :             END SELECT
    1177             :          END DO
    1178             : 
    1179           8 :          DEALLOCATE (buffer)
    1180             : 
    1181           8 :          IF (para_env%is_source()) THEN
    1182           0 :             SELECT CASE (init_method)
    1183             :             CASE (do_mat_read)
    1184           4 :                CALL close_file(unit_number=unit_number)
    1185             :             END SELECT
    1186             :          END IF
    1187             : 
    1188          88 :          DO i_loop = 1, n_loop
    1189        1000 :             eigenvalues = 0.0_dp
    1190          80 :             CALL cp_fm_set_all(eigenvectors, 0.0_dp)
    1191             :             CALL cp_fm_to_fm(source=matrix, &
    1192          80 :                              destination=work)
    1193             : 
    1194             :             ! DONE, now testing
    1195          80 :             t1 = m_walltime()
    1196          40 :             SELECT CASE (diag_method)
    1197             :             CASE (do_diag_syevd)
    1198             :                CALL cp_fm_syevd(matrix=work, &
    1199             :                                 eigenvectors=eigenvectors, &
    1200          40 :                                 eigenvalues=eigenvalues)
    1201             :             CASE (do_diag_syevx)
    1202             :                CALL cp_fm_syevx(matrix=work, &
    1203             :                                 eigenvectors=eigenvectors, &
    1204             :                                 eigenvalues=eigenvalues, &
    1205             :                                 neig=neig, &
    1206          80 :                                 work_syevx=1.0_dp)
    1207             :             END SELECT
    1208          80 :             t2 = m_walltime()
    1209          88 :             IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
    1210             :          END DO
    1211             : 
    1212           8 :          IF (iw > 0) THEN
    1213           4 :             WRITE (iw, *) "Eigenvalues: "
    1214           4 :             WRITE (UNIT=iw, FMT="(T3,5F14.6)") eigenvalues(1:neig)
    1215          42 :             WRITE (UNIT=iw, FMT="(T3,A4,F16.6)") "Sum:", SUM(eigenvalues(1:neig))
    1216           4 :             WRITE (iw, *) ""
    1217             :          END IF
    1218             : 
    1219             :          ! Clean up
    1220           8 :          DEALLOCATE (eigenvalues)
    1221           8 :          CALL cp_fm_release(matrix=work)
    1222           8 :          CALL cp_fm_release(matrix=eigenvectors)
    1223           8 :          CALL cp_fm_release(matrix=matrix)
    1224          42 :          CALL cp_fm_struct_release(fmstruct=fmstruct)
    1225             : 
    1226             :       END DO
    1227             : 
    1228           2 :       CALL cp_blacs_env_release(blacs_env=blacs_env)
    1229             : 
    1230           4 :    END SUBROUTINE eigensolver_test
    1231             : 
    1232             : ! **************************************************************************************************
    1233             : !> \brief Tests the parallel matrix multiply
    1234             : !> \param para_env ...
    1235             : !> \param iw ...
    1236             : !> \param cp_fm_gemm_test_section ...
    1237             : ! **************************************************************************************************
    1238           4 :    SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
    1239             : 
    1240             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1241             :       INTEGER                                            :: iw
    1242             :       TYPE(section_vals_type), POINTER                   :: cp_fm_gemm_test_section
    1243             : 
    1244             :       CHARACTER(LEN=1)                                   :: transa, transb
    1245             :       INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, ncol_block_actual, &
    1246             :          ncol_global, np, nrow_block, nrow_block_actual, nrow_global
    1247           4 :       INTEGER, DIMENSION(:), POINTER                     :: grid_2d
    1248             :       LOGICAL                                            :: force_blocksize, row_major, transa_p, &
    1249             :                                                             transb_p
    1250             :       REAL(KIND=dp)                                      :: t1, t2, t3, t4
    1251             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1252             :       TYPE(cp_fm_struct_type), POINTER                   :: fmstruct_a, fmstruct_b, fmstruct_c
    1253             :       TYPE(cp_fm_type)                                   :: matrix_a, matrix_b, matrix_c
    1254             : 
    1255           4 :       CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
    1256          24 :       DO i_rep = 1, n_rep
    1257             : 
    1258             :          ! how often should we do the multiply
    1259          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1260             : 
    1261             :          ! matrices def.
    1262          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
    1263          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
    1264          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
    1265          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1266          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1267          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
    1268          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
    1269          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
    1270          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
    1271          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
    1272          20 :          transa = "N"
    1273          20 :          transb = "N"
    1274          20 :          IF (transa_p) transa = "T"
    1275          20 :          IF (transb_p) transb = "T"
    1276             : 
    1277          20 :          IF (iw > 0) THEN
    1278          10 :             WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
    1279          10 :             WRITE (iw, '(T2,A)', ADVANCE="NO") "C = "
    1280          10 :             IF (transa_p) THEN
    1281           2 :                WRITE (iw, '(A)', ADVANCE="NO") "TRANSPOSE(A) x"
    1282             :             ELSE
    1283           8 :                WRITE (iw, '(A)', ADVANCE="NO") "A x "
    1284             :             END IF
    1285          10 :             IF (transb_p) THEN
    1286           2 :                WRITE (iw, '(A)') "TRANSPOSE(B) "
    1287             :             ELSE
    1288           8 :                WRITE (iw, '(A)') "B "
    1289             :             END IF
    1290          10 :             WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
    1291          10 :             WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
    1292          10 :             WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
    1293          30 :             WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
    1294          10 :             WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
    1295             :             ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
    1296          10 :             np = cp_fm_pilaenv(0, 'D')
    1297          10 :             IF (np > 0) THEN
    1298          10 :                WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', np
    1299             :             END IF
    1300             :          END IF
    1301             : 
    1302          20 :          NULLIFY (blacs_env)
    1303             :          CALL cp_blacs_env_create(blacs_env=blacs_env, &
    1304             :                                   para_env=para_env, &
    1305             :                                   row_major=row_major, &
    1306          20 :                                   grid_2d=grid_2d)
    1307             : 
    1308          20 :          NULLIFY (fmstruct_a)
    1309          20 :          IF (transa_p) THEN
    1310           4 :             nrow_global = m; ncol_global = k
    1311             :          ELSE
    1312          16 :             nrow_global = k; ncol_global = m
    1313             :          END IF
    1314             :          CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
    1315             :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1316          20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1317          20 :          CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1318          30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
    1319          20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1320             : 
    1321          20 :          IF (transb_p) THEN
    1322           4 :             nrow_global = n; ncol_global = m
    1323             :          ELSE
    1324          16 :             nrow_global = m; ncol_global = n
    1325             :          END IF
    1326          20 :          NULLIFY (fmstruct_b)
    1327             :          CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
    1328             :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1329          20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1330          20 :          CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1331          30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
    1332          20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1333             : 
    1334          20 :          NULLIFY (fmstruct_c)
    1335          20 :          nrow_global = k
    1336          20 :          ncol_global = n
    1337             :          CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
    1338             :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1339          20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1340          20 :          CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1341          30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
    1342          20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1343             : 
    1344          20 :          CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
    1345          20 :          CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
    1346          20 :          CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
    1347             : 
    1348          20 :          CALL RANDOM_NUMBER(matrix_a%local_data)
    1349          20 :          CALL RANDOM_NUMBER(matrix_b%local_data)
    1350          20 :          CALL RANDOM_NUMBER(matrix_c%local_data)
    1351             : 
    1352          20 :          IF (iw > 0) CALL m_flush(iw)
    1353             : 
    1354          20 :          t1 = m_walltime()
    1355         932 :          DO i_loop = 1, N_loop
    1356         912 :             t3 = m_walltime()
    1357         912 :             CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
    1358         912 :             t4 = m_walltime()
    1359         932 :             IF (iw > 0) THEN
    1360         456 :                WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
    1361         456 :                CALL m_flush(iw)
    1362             :             END IF
    1363             :          END DO
    1364          20 :          t2 = m_walltime()
    1365             : 
    1366          20 :          IF (iw > 0) THEN
    1367          10 :             WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/N_loop
    1368          10 :             IF (t2 > t1) THEN
    1369          10 :                WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
    1370          20 :                   2*REAL(m, kind=dp)*REAL(n, kind=dp)*REAL(k, kind=dp)*N_loop/MAX(0.001_dp, t2 - t1)/1.0E9_dp/para_env%num_pe
    1371             :             END IF
    1372             :          END IF
    1373             : 
    1374          20 :          CALL cp_fm_release(matrix=matrix_a)
    1375          20 :          CALL cp_fm_release(matrix=matrix_b)
    1376          20 :          CALL cp_fm_release(matrix=matrix_c)
    1377          20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_a)
    1378          20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_b)
    1379          20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_c)
    1380         144 :          CALL cp_blacs_env_release(blacs_env=blacs_env)
    1381             : 
    1382             :       END DO
    1383             : 
    1384           4 :    END SUBROUTINE cp_fm_gemm_test
    1385             : 
    1386             : ! **************************************************************************************************
    1387             : !> \brief Tests the DBCSR interface.
    1388             : !> \param para_env ...
    1389             : !> \param iw ...
    1390             : !> \param input_section ...
    1391             : ! **************************************************************************************************
    1392          32 :    SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
    1393             : 
    1394             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1395             :       INTEGER                                            :: iw
    1396             :       TYPE(section_vals_type), POINTER                   :: input_section
    1397             : 
    1398             :       CHARACTER, DIMENSION(3)                            :: types
    1399             :       INTEGER                                            :: data_type, i_rep, k, m, n, N_loop, &
    1400             :                                                             n_rep, test_type
    1401          32 :       INTEGER, DIMENSION(:), POINTER                     :: bs_k, bs_m, bs_n, nproc
    1402             :       LOGICAL                                            :: always_checksum, retain_sparsity, &
    1403             :                                                             transa_p, transb_p
    1404             :       REAL(KIND=dp)                                      :: alpha, beta, filter_eps, s_a, s_b, s_c
    1405             : 
    1406             : !   ---------------------------------------------------------------------------
    1407             : 
    1408          32 :       NULLIFY (bs_m, bs_n, bs_k)
    1409          32 :       CALL section_vals_get(input_section, n_repetition=n_rep)
    1410          32 :       CALL dbcsr_reset_randmat_seed()
    1411          78 :       DO i_rep = 1, n_rep
    1412             :          ! how often should we do the multiply
    1413          46 :          CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1414             : 
    1415             :          ! matrices def.
    1416          46 :          CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
    1417          46 :          CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
    1418          46 :          CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
    1419          46 :          CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
    1420          46 :          CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
    1421          46 :          CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1422          46 :          CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1423             :          CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
    1424          46 :                                    i_vals=bs_m)
    1425             :          CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
    1426          46 :                                    i_vals=bs_n)
    1427             :          CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
    1428          46 :                                    i_vals=bs_k)
    1429          46 :          CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
    1430          46 :          CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
    1431          46 :          CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
    1432          46 :          CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
    1433          46 :          CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
    1434          46 :          CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
    1435             :          CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
    1436          46 :                                    i_vals=nproc)
    1437             :          CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
    1438          46 :                                    c_val=types(1))
    1439             :          CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
    1440          46 :                                    c_val=types(2))
    1441             :          CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
    1442          46 :                                    c_val=types(3))
    1443             :          CALL section_vals_val_get(input_section, "filter_eps", &
    1444          46 :                                    i_rep_section=i_rep, r_val=filter_eps)
    1445          46 :          CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
    1446             : 
    1447             :          CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
    1448             :                               (/m, n, k/), &
    1449             :                               (/transa_p, transb_p/), &
    1450             :                               bs_m, bs_n, bs_k, &
    1451             :                               (/s_a, s_b, s_c/), &
    1452             :                               alpha, beta, &
    1453             :                               data_type=data_type, &
    1454             :                               test_type=test_type, &
    1455             :                               n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
    1456         538 :                               always_checksum=always_checksum)
    1457             :       END DO
    1458          32 :    END SUBROUTINE cp_dbcsr_tests
    1459             : 
    1460             : ! **************************************************************************************************
    1461             : !> \brief Tests the DBM library.
    1462             : !> \param para_env ...
    1463             : !> \param iw ...
    1464             : !> \param input_section ...
    1465             : ! **************************************************************************************************
    1466          14 :    SUBROUTINE run_dbm_tests(para_env, iw, input_section)
    1467             : 
    1468             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1469             :       INTEGER                                            :: iw
    1470             :       TYPE(section_vals_type), POINTER                   :: input_section
    1471             : 
    1472             :       INTEGER                                            :: i_rep, k, m, n, N_loop, n_rep
    1473          14 :       INTEGER, DIMENSION(:), POINTER                     :: bs_k, bs_m, bs_n
    1474             :       LOGICAL                                            :: always_checksum, retain_sparsity, &
    1475             :                                                             transa_p, transb_p
    1476             :       REAL(KIND=dp)                                      :: alpha, beta, filter_eps, s_a, s_b, s_c
    1477             : 
    1478             : !   ---------------------------------------------------------------------------
    1479             : 
    1480          14 :       NULLIFY (bs_m, bs_n, bs_k)
    1481          14 :       CALL section_vals_get(input_section, n_repetition=n_rep)
    1482          14 :       CALL dbcsr_reset_randmat_seed()
    1483          28 :       DO i_rep = 1, n_rep
    1484          14 :          CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1485          14 :          CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
    1486          14 :          CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
    1487          14 :          CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
    1488          14 :          CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1489          14 :          CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1490          14 :          CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, i_vals=bs_m)
    1491          14 :          CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, i_vals=bs_n)
    1492          14 :          CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, i_vals=bs_k)
    1493          14 :          CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
    1494          14 :          CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
    1495          14 :          CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
    1496          14 :          CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
    1497          14 :          CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
    1498          14 :          CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
    1499          14 :          CALL section_vals_val_get(input_section, "filter_eps", i_rep_section=i_rep, r_val=filter_eps)
    1500          14 :          CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
    1501             : 
    1502             :          CALL dbm_run_tests(mp_group=para_env, &
    1503             :                             io_unit=iw, &
    1504             :                             matrix_sizes=(/m, n, k/), &
    1505             :                             trs=(/transa_p, transb_p/), &
    1506             :                             bs_m=bs_m, &
    1507             :                             bs_n=bs_n, &
    1508             :                             bs_k=bs_k, &
    1509             :                             sparsities=(/s_a, s_b, s_c/), &
    1510             :                             alpha=alpha, &
    1511             :                             beta=beta, &
    1512             :                             n_loops=n_loop, &
    1513             :                             eps=filter_eps, &
    1514             :                             retain_sparsity=retain_sparsity, &
    1515         154 :                             always_checksum=always_checksum)
    1516             :       END DO
    1517          14 :    END SUBROUTINE run_dbm_tests
    1518             : 
    1519        8484 : END MODULE library_tests

Generated by: LCOV version 1.15