LCOV - code coverage report
Current view: top level - src/common - timings_report.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 142 161 88.2 %
Date: 2024-11-22 07:00:40 Functions: 4 4 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Timing routines for accounting
      10             : !> \par History
      11             : !>      02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
      12             : !>      11.2004 storable timer_envs (for f77 interface) [fawzi]
      13             : !>      10.2005 binary search to speed up lookup in timeset [fawzi]
      14             : !>      12.2012 Complete rewrite based on dictionaries. [ole]
      15             : !>      01.2014 Collect statistics from all MPI ranks. [ole]
      16             : !> \author JGH
      17             : ! **************************************************************************************************
      18             : MODULE timings_report
      19             :    USE callgraph,                       ONLY: callgraph_item_type,&
      20             :                                               callgraph_items
      21             :    USE cp_files,                        ONLY: close_file,&
      22             :                                               open_file
      23             :    USE kinds,                           ONLY: default_string_length,&
      24             :                                               dp,&
      25             :                                               int_8
      26             :    USE list,                            ONLY: list_destroy,&
      27             :                                               list_get,&
      28             :                                               list_init,&
      29             :                                               list_isready,&
      30             :                                               list_pop,&
      31             :                                               list_push,&
      32             :                                               list_size
      33             :    USE list_routinereport,              ONLY: list_routinereport_type
      34             :    USE message_passing,                 ONLY: mp_para_env_type
      35             :    USE routine_map,                     ONLY: routine_map_get,&
      36             :                                               routine_map_haskey
      37             :    USE timings,                         ONLY: get_timer_env
      38             :    USE timings_base_type,               ONLY: call_stat_type,&
      39             :                                               routine_report_type,&
      40             :                                               routine_stat_type
      41             :    USE timings_types,                   ONLY: timer_env_type
      42             :    USE util,                            ONLY: sort
      43             : #include "../base/base_uses.f90"
      44             : 
      45             :    IMPLICIT NONE
      46             :    PRIVATE
      47             : 
      48             :    INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
      49             : 
      50             :    PUBLIC :: timings_report_print, timings_report_callgraph
      51             : 
      52             : CONTAINS
      53             : 
      54             : ! **************************************************************************************************
      55             : !> \brief Print accumulated information on timers
      56             : !> \param iw ...
      57             : !> \param r_timings ...
      58             : !> \param sort_by_self_time ...
      59             : !> \param cost_type ...
      60             : !> \param report_maxloc ...
      61             : !> \param para_env is needed to collect statistics from other nodes.
      62             : !> \par History
      63             : !>      none
      64             : !> \author JGH
      65             : ! **************************************************************************************************
      66        9127 :    SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
      67             :       INTEGER, INTENT(IN)                                :: iw
      68             :       REAL(KIND=dp), INTENT(IN)                          :: r_timings
      69             :       LOGICAL, INTENT(IN)                                :: sort_by_self_time
      70             :       INTEGER, INTENT(IN)                                :: cost_type
      71             :       LOGICAL, INTENT(IN)                                :: report_maxloc
      72             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
      73             : 
      74             :       TYPE(list_routinereport_type)                      :: reports
      75             :       TYPE(routine_report_type), POINTER                 :: r_report
      76             : 
      77        9127 :       CALL list_init(reports)
      78        9127 :       CALL collect_reports_from_ranks(reports, cost_type, para_env)
      79             : 
      80        9127 :       IF (list_size(reports) > 0 .AND. iw > 0) &
      81        4667 :          CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
      82             : 
      83             :       ! deallocate reports
      84     3669691 :       DO WHILE (list_size(reports) > 0)
      85     3660564 :          r_report => list_pop(reports)
      86     3660564 :          DEALLOCATE (r_report)
      87             :       END DO
      88        9127 :       CALL list_destroy(reports)
      89             : 
      90        9127 :    END SUBROUTINE timings_report_print
      91             : 
      92             : ! **************************************************************************************************
      93             : !> \brief Collects the timing or energy reports from all MPI ranks.
      94             : !> \param reports ...
      95             : !> \param cost_type ...
      96             : !> \param para_env ...
      97             : !> \author Ole Schuett
      98             : ! **************************************************************************************************
      99        9127 :    SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
     100             :       TYPE(list_routinereport_type), INTENT(INOUT)       :: reports
     101             :       INTEGER, INTENT(IN)                                :: cost_type
     102             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
     103             : 
     104             :       CHARACTER(LEN=default_string_length)               :: routineN
     105             :       INTEGER                                            :: local_routine_id, sending_rank
     106        9127 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: collected
     107             :       REAL(KIND=dp)                                      :: foobar
     108             :       REAL(KIND=dp), DIMENSION(2)                        :: dbuf
     109             :       TYPE(routine_report_type), POINTER                 :: r_report
     110             :       TYPE(routine_stat_type), POINTER                   :: r_stat
     111             :       TYPE(timer_env_type), POINTER                      :: timer_env
     112             : 
     113        9127 :       NULLIFY (r_stat, r_report, timer_env)
     114        9127 :       IF (.NOT. list_isready(reports)) &
     115           0 :          CPABORT("BUG")
     116             : 
     117        9127 :       timer_env => get_timer_env()
     118             : 
     119             :       ! make sure all functions have been called so that list_size(timer_env%routine_stats)
     120             :       ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
     121             :       ! this hack makes sure they are called before
     122        9127 :       routineN = ""
     123        9127 :       CALL para_env%bcast(routineN, 0)
     124        9127 :       sending_rank = 0
     125        9127 :       CALL para_env%max(sending_rank)
     126        9127 :       CALL para_env%sum(sending_rank)
     127        9127 :       foobar = 0.0_dp
     128        9127 :       CALL para_env%max(foobar)
     129        9127 :       dbuf = 0.0_dp
     130        9127 :       CALL para_env%maxloc(dbuf)
     131        9127 :       CALL para_env%sum(foobar)
     132             :       ! end hack
     133             : 
     134             :       ! Array collected is used as a bit field.
     135             :       ! It's of type integer in order to use the convenient MINLOC routine.
     136       27381 :       ALLOCATE (collected(list_size(timer_env%routine_stats)))
     137     3656592 :       collected(:) = 0
     138             : 
     139     3660564 :       DO
     140             :          ! does any rank have uncollected stats?
     141     3669691 :          sending_rank = -1
     142   751256412 :          IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
     143     3669691 :          CALL para_env%max(sending_rank)
     144     3669691 :          IF (sending_rank < 0) EXIT ! every rank got all routines collected
     145     3660564 :          IF (sending_rank == para_env%mepos) THEN
     146   899614472 :             local_routine_id = MINLOC(collected, dim=1)
     147     1853241 :             r_stat => list_get(timer_env%routine_stats, local_routine_id)
     148     1853241 :             routineN = r_stat%routineN
     149             :          END IF
     150     3660564 :          CALL para_env%bcast(routineN, sending_rank)
     151             : 
     152             :          ! Create new report for routineN
     153     3660564 :          ALLOCATE (r_report)
     154     3660564 :          CALL list_push(reports, r_report)
     155     3660564 :          r_report%routineN = routineN
     156             : 
     157             :          ! If routineN was called on local node, add local stats
     158     3660564 :          IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
     159     3647465 :             local_routine_id = routine_map_get(timer_env%routine_names, routineN)
     160     3647465 :             collected(local_routine_id) = 1
     161     3647465 :             r_stat => list_get(timer_env%routine_stats, local_routine_id)
     162     3647465 :             r_report%max_total_calls = r_stat%total_calls
     163     3647465 :             r_report%sum_total_calls = r_stat%total_calls
     164     3647465 :             r_report%sum_stackdepth = r_stat%stackdepth_accu
     165     3647465 :             SELECT CASE (cost_type)
     166             :             CASE (cost_type_energy)
     167           0 :                r_report%max_icost = r_stat%incl_energy_accu
     168           0 :                r_report%sum_icost = r_stat%incl_energy_accu
     169           0 :                r_report%max_ecost = r_stat%excl_energy_accu
     170           0 :                r_report%sum_ecost = r_stat%excl_energy_accu
     171             :             CASE (cost_type_time)
     172     3647465 :                r_report%max_icost = r_stat%incl_walltime_accu
     173     3647465 :                r_report%sum_icost = r_stat%incl_walltime_accu
     174     3647465 :                r_report%max_ecost = r_stat%excl_walltime_accu
     175     3647465 :                r_report%sum_ecost = r_stat%excl_walltime_accu
     176             :             CASE DEFAULT
     177     3647465 :                CPABORT("BUG")
     178             :             END SELECT
     179             :          END IF
     180             : 
     181             :          ! collect stats of routineN via MPI
     182     3660564 :          CALL para_env%max(r_report%max_total_calls)
     183     3660564 :          CALL para_env%sum(r_report%sum_total_calls)
     184     3660564 :          CALL para_env%sum(r_report%sum_stackdepth)
     185             : 
     186             :          ! get value and rank of the maximum inclusive cost
     187    10981692 :          dbuf = (/r_report%max_icost, REAL(para_env%mepos, KIND=dp)/)
     188     3660564 :          CALL para_env%maxloc(dbuf)
     189     3660564 :          r_report%max_icost = dbuf(1)
     190     3660564 :          r_report%max_irank = INT(dbuf(2))
     191             : 
     192     3660564 :          CALL para_env%sum(r_report%sum_icost)
     193             : 
     194             :          ! get value and rank of the maximum exclusive cost
     195    10981692 :          dbuf = (/r_report%max_ecost, REAL(para_env%mepos, KIND=dp)/)
     196     3660564 :          CALL para_env%maxloc(dbuf)
     197     3660564 :          r_report%max_ecost = dbuf(1)
     198     3660564 :          r_report%max_erank = INT(dbuf(2))
     199             : 
     200     3669691 :          CALL para_env%sum(r_report%sum_ecost)
     201             :       END DO
     202             : 
     203        9127 :    END SUBROUTINE collect_reports_from_ranks
     204             : 
     205             : ! **************************************************************************************************
     206             : !> \brief Print the collected reports
     207             : !> \param reports ...
     208             : !> \param iw ...
     209             : !> \param threshold ...
     210             : !> \param sort_by_exclusiv_cost ...
     211             : !> \param cost_type ...
     212             : !> \param report_maxloc ...
     213             : !> \param para_env ...
     214             : !> \par History
     215             : !>      01.2014 Refactored (Ole Schuett)
     216             : !> \author JGH
     217             : ! **************************************************************************************************
     218        4667 :    SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
     219             :       TYPE(list_routinereport_type), INTENT(IN)          :: reports
     220             :       INTEGER, INTENT(IN)                                :: iw
     221             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     222             :       LOGICAL, INTENT(IN)                                :: sort_by_exclusiv_cost
     223             :       INTEGER, INTENT(IN)                                :: cost_type
     224             :       LOGICAL, INTENT(IN)                                :: report_maxloc
     225             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
     226             : 
     227             :       CHARACTER(LEN=4)                                   :: label
     228             :       CHARACTER(LEN=default_string_length)               :: fmt, title
     229             :       INTEGER                                            :: decimals, i, j, num_routines
     230        4667 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indices
     231             :       REAL(KIND=dp)                                      :: asd, maxcost, mincost
     232        4667 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: max_costs
     233             :       TYPE(routine_report_type), POINTER                 :: r_report_i, r_report_j
     234             : 
     235        4667 :       NULLIFY (r_report_i, r_report_j)
     236        4667 :       IF (.NOT. list_isready(reports)) &
     237           0 :          CPABORT("BUG")
     238             : 
     239             :       ! are we printing timing or energy ?
     240        4667 :       SELECT CASE (cost_type)
     241             :       CASE (cost_type_energy)
     242           0 :          title = "E N E R G Y"
     243           0 :          label = "ENER"
     244             :       CASE (cost_type_time)
     245        4667 :          title = "T I M I N G"
     246        4667 :          label = "TIME"
     247             :       CASE DEFAULT
     248        4667 :          CPABORT("BUG")
     249             :       END SELECT
     250             : 
     251             :       ! write banner
     252        4667 :       WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
     253        4667 :       WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
     254        4667 :       WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
     255        4667 :       WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
     256        4667 :       WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
     257        4667 :       IF (report_maxloc) THEN
     258             :          WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
     259           0 :             "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
     260             :       ELSE
     261             :          WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
     262        4667 :             "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
     263             :       END IF
     264             : 
     265             :       WRITE (UNIT=iw, FMT="(T33,A)") &
     266        4667 :          "MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM"
     267             : 
     268             :       ! sort statistics
     269        4667 :       num_routines = list_size(reports)
     270       14001 :       ALLOCATE (max_costs(num_routines))
     271     1857908 :       DO i = 1, num_routines
     272     1853241 :          r_report_i => list_get(reports, i)
     273     1857908 :          IF (sort_by_exclusiv_cost) THEN
     274        1436 :             max_costs(i) = r_report_i%max_ecost
     275             :          ELSE
     276     1851805 :             max_costs(i) = r_report_i%max_icost
     277             :          END IF
     278             :       END DO
     279       14001 :       ALLOCATE (indices(num_routines))
     280        4667 :       CALL sort(max_costs, num_routines, indices)
     281             : 
     282     1862575 :       maxcost = MAXVAL(max_costs)
     283        4667 :       mincost = maxcost*threshold
     284             : 
     285             :       ! adjust fmt dynamically based on the max walltime.
     286             :       ! few clocks have more than 3 digits resolution, so stop there
     287        4667 :       decimals = 3
     288        4667 :       IF (maxcost >= 10000) decimals = 2
     289           0 :       IF (maxcost >= 100000) decimals = 1
     290        4667 :       IF (maxcost >= 1000000) decimals = 0
     291        4667 :       IF (report_maxloc) THEN
     292             :          WRITE (UNIT=fmt, FMT="(A,I0,A)") &
     293           0 :             "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
     294             :       ELSE
     295             :          WRITE (UNIT=fmt, FMT="(A,I0,A)") &
     296        4667 :             "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
     297             :       END IF
     298             : 
     299             :       !write output
     300     1857908 :       DO i = num_routines, 1, -1
     301     1857908 :          IF (max_costs(i) >= mincost) THEN
     302      257594 :             j = indices(i)
     303      257594 :             r_report_j => list_get(reports, j)
     304             :             ! average stack depth
     305             :             asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
     306      257594 :                   REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
     307      257594 :             IF (report_maxloc) THEN
     308             :                WRITE (UNIT=iw, FMT=fmt) &
     309           0 :                   ADJUSTL(r_report_j%routineN(1:31)), &
     310           0 :                   r_report_j%max_total_calls, &
     311           0 :                   asd, &
     312           0 :                   r_report_j%sum_ecost/para_env%num_pe, &
     313           0 :                   r_report_j%max_ecost, &
     314           0 :                   r_report_j%sum_icost/para_env%num_pe, &
     315           0 :                   r_report_j%max_icost, &
     316           0 :                   r_report_j%max_erank
     317             :             ELSE
     318             :                WRITE (UNIT=iw, FMT=fmt) &
     319      257594 :                   ADJUSTL(r_report_j%routineN(1:31)), &
     320      257594 :                   r_report_j%max_total_calls, &
     321      257594 :                   asd, &
     322      257594 :                   r_report_j%sum_ecost/para_env%num_pe, &
     323      257594 :                   r_report_j%max_ecost, &
     324      257594 :                   r_report_j%sum_icost/para_env%num_pe, &
     325      515188 :                   r_report_j%max_icost
     326             :             END IF
     327             :          END IF
     328             :       END DO
     329        4667 :       WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
     330             : 
     331        4667 :    END SUBROUTINE print_reports
     332             : 
     333             : ! **************************************************************************************************
     334             : !> \brief Write accumulated callgraph information as cachegrind-file.
     335             : !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
     336             : !> \param filename ...
     337             : !> \par History
     338             : !>     12.2012  initial version[ole]
     339             : !> \author Ole Schuett
     340             : ! **************************************************************************************************
     341           1 :    SUBROUTINE timings_report_callgraph(filename)
     342             :       CHARACTER(len=*), INTENT(in)                       :: filename
     343             : 
     344             :       INTEGER, PARAMETER                                 :: E = 1000, T = 100000
     345             : 
     346             :       INTEGER                                            :: i, unit
     347             :       TYPE(call_stat_type), POINTER                      :: c_stat
     348           1 :       TYPE(callgraph_item_type), DIMENSION(:), POINTER   :: ct_items
     349             :       TYPE(routine_stat_type), POINTER                   :: r_stat
     350             :       TYPE(timer_env_type), POINTER                      :: timer_env
     351             : 
     352             :       CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
     353           1 :                      file_form="FORMATTED", unit_number=unit)
     354           1 :       timer_env => get_timer_env()
     355             : 
     356             :       ! use outermost routine as total runtime
     357           1 :       r_stat => list_get(timer_env%routine_stats, 1)
     358           1 :       WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
     359           1 :       WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
     360           1 :          INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
     361           2 :          INT(E*r_stat%incl_energy_accu, KIND=int_8)
     362             : 
     363         454 :       DO i = 1, list_size(timer_env%routine_stats)
     364         453 :          r_stat => list_get(timer_env%routine_stats, i)
     365         453 :          WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
     366         453 :          WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
     367         453 :             INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
     368         907 :             INT(E*r_stat%excl_energy_accu, KIND=int_8)
     369             :       END DO
     370             : 
     371           1 :       ct_items => callgraph_items(timer_env%callgraph)
     372         780 :       DO i = 1, SIZE(ct_items)
     373         779 :          c_stat => ct_items(i)%value
     374         779 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
     375         779 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
     376         779 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
     377         779 :          WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
     378         779 :             INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
     379        1559 :             INT(E*c_stat%incl_energy_accu, KIND=int_8)
     380             :       END DO
     381           1 :       DEALLOCATE (ct_items)
     382             : 
     383           1 :       CALL close_file(unit_number=unit, file_status="KEEP")
     384             : 
     385           1 :    END SUBROUTINE timings_report_callgraph
     386             : END MODULE timings_report
     387             : 

Generated by: LCOV version 1.15