Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Timing routines for accounting
10 : !> \par History
11 : !> 02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
12 : !> 11.2004 storable timer_envs (for f77 interface) [fawzi]
13 : !> 10.2005 binary search to speed up lookup in timeset [fawzi]
14 : !> 12.2012 Complete rewrite based on dictionaries. [ole]
15 : !> 01.2014 Collect statistics from all MPI ranks. [ole]
16 : !> \author JGH
17 : ! **************************************************************************************************
18 : MODULE timings_report
19 : USE callgraph, ONLY: callgraph_item_type,&
20 : callgraph_items
21 : USE cp_files, ONLY: close_file,&
22 : open_file
23 : USE kinds, ONLY: default_string_length,&
24 : dp,&
25 : int_8
26 : USE list, ONLY: list_destroy,&
27 : list_get,&
28 : list_init,&
29 : list_isready,&
30 : list_pop,&
31 : list_push,&
32 : list_size
33 : USE list_routinereport, ONLY: list_routinereport_type
34 : USE message_passing, ONLY: mp_para_env_type
35 : USE routine_map, ONLY: routine_map_get,&
36 : routine_map_haskey
37 : USE timings, ONLY: get_timer_env
38 : USE timings_base_type, ONLY: call_stat_type,&
39 : routine_report_type,&
40 : routine_stat_type
41 : USE timings_types, ONLY: timer_env_type
42 : USE util, ONLY: sort
43 : #include "../base/base_uses.f90"
44 :
45 : IMPLICIT NONE
46 : PRIVATE
47 :
48 : INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
49 :
50 : PUBLIC :: timings_report_print, timings_report_callgraph
51 :
52 : CONTAINS
53 :
54 : ! **************************************************************************************************
55 : !> \brief Print accumulated information on timers
56 : !> \param iw ...
57 : !> \param r_timings ...
58 : !> \param sort_by_self_time ...
59 : !> \param cost_type ...
60 : !> \param report_maxloc ...
61 : !> \param para_env is needed to collect statistics from other nodes.
62 : !> \par History
63 : !> none
64 : !> \author JGH
65 : ! **************************************************************************************************
66 9801 : SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
67 : INTEGER, INTENT(IN) :: iw
68 : REAL(KIND=dp), INTENT(IN) :: r_timings
69 : LOGICAL, INTENT(IN) :: sort_by_self_time
70 : INTEGER, INTENT(IN) :: cost_type
71 : LOGICAL, INTENT(IN) :: report_maxloc
72 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
73 :
74 : TYPE(list_routinereport_type) :: reports
75 : TYPE(routine_report_type), POINTER :: r_report
76 :
77 9801 : CALL list_init(reports)
78 9801 : CALL collect_reports_from_ranks(reports, cost_type, para_env)
79 :
80 9801 : IF (list_size(reports) > 0 .AND. iw > 0) &
81 5004 : CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
82 :
83 : ! deallocate reports
84 3917245 : DO WHILE (list_size(reports) > 0)
85 3907444 : r_report => list_pop(reports)
86 3907444 : DEALLOCATE (r_report)
87 : END DO
88 9801 : CALL list_destroy(reports)
89 :
90 9801 : END SUBROUTINE timings_report_print
91 :
92 : ! **************************************************************************************************
93 : !> \brief Collects the timing or energy reports from all MPI ranks.
94 : !> \param reports ...
95 : !> \param cost_type ...
96 : !> \param para_env ...
97 : !> \author Ole Schuett
98 : ! **************************************************************************************************
99 9801 : SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
100 : TYPE(list_routinereport_type), INTENT(INOUT) :: reports
101 : INTEGER, INTENT(IN) :: cost_type
102 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
103 :
104 : CHARACTER(LEN=default_string_length) :: routineN
105 : INTEGER :: local_routine_id, sending_rank
106 9801 : INTEGER, ALLOCATABLE, DIMENSION(:) :: collected
107 : REAL(KIND=dp) :: foobar
108 : REAL(KIND=dp), DIMENSION(2) :: dbuf
109 : TYPE(routine_report_type), POINTER :: r_report
110 : TYPE(routine_stat_type), POINTER :: r_stat
111 : TYPE(timer_env_type), POINTER :: timer_env
112 :
113 9801 : NULLIFY (r_stat, r_report, timer_env)
114 9801 : IF (.NOT. list_isready(reports)) &
115 0 : CPABORT("BUG")
116 :
117 9801 : timer_env => get_timer_env()
118 :
119 : ! make sure all functions have been called so that list_size(timer_env%routine_stats)
120 : ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
121 : ! this hack makes sure they are called before
122 9801 : routineN = ""
123 9801 : CALL para_env%bcast(routineN, 0)
124 9801 : sending_rank = 0
125 9801 : CALL para_env%max(sending_rank)
126 9801 : CALL para_env%sum(sending_rank)
127 9801 : foobar = 0.0_dp
128 9801 : CALL para_env%max(foobar)
129 9801 : dbuf = 0.0_dp
130 9801 : CALL para_env%maxloc(dbuf)
131 9801 : CALL para_env%sum(foobar)
132 : ! end hack
133 :
134 : ! Array collected is used as a bit field.
135 : ! It's of type integer in order to use the convenient MINLOC routine.
136 29403 : ALLOCATE (collected(list_size(timer_env%routine_stats)))
137 3902831 : collected(:) = 0
138 :
139 3907444 : DO
140 : ! does any rank have uncollected stats?
141 3917245 : sending_rank = -1
142 796459693 : IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
143 3917245 : CALL para_env%max(sending_rank)
144 3917245 : IF (sending_rank < 0) EXIT ! every rank got all routines collected
145 3907444 : IF (sending_rank == para_env%mepos) THEN
146 945569750 : local_routine_id = MINLOC(collected, dim=1)
147 1976675 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
148 1976675 : routineN = r_stat%routineN
149 : END IF
150 3907444 : CALL para_env%bcast(routineN, sending_rank)
151 :
152 : ! Create new report for routineN
153 3907444 : ALLOCATE (r_report)
154 3907444 : CALL list_push(reports, r_report)
155 3907444 : r_report%routineN = routineN
156 :
157 : ! If routineN was called on local node, add local stats
158 3907444 : IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
159 3893030 : local_routine_id = routine_map_get(timer_env%routine_names, routineN)
160 3893030 : collected(local_routine_id) = 1
161 3893030 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
162 3893030 : r_report%max_total_calls = r_stat%total_calls
163 3893030 : r_report%sum_total_calls = r_stat%total_calls
164 3893030 : r_report%sum_stackdepth = r_stat%stackdepth_accu
165 3893030 : SELECT CASE (cost_type)
166 : CASE (cost_type_energy)
167 0 : r_report%max_icost = r_stat%incl_energy_accu
168 0 : r_report%sum_icost = r_stat%incl_energy_accu
169 0 : r_report%max_ecost = r_stat%excl_energy_accu
170 0 : r_report%sum_ecost = r_stat%excl_energy_accu
171 : CASE (cost_type_time)
172 3893030 : r_report%max_icost = r_stat%incl_walltime_accu
173 3893030 : r_report%sum_icost = r_stat%incl_walltime_accu
174 3893030 : r_report%max_ecost = r_stat%excl_walltime_accu
175 3893030 : r_report%sum_ecost = r_stat%excl_walltime_accu
176 : CASE DEFAULT
177 3893030 : CPABORT("BUG")
178 : END SELECT
179 : END IF
180 :
181 : ! collect stats of routineN via MPI
182 3907444 : CALL para_env%max(r_report%max_total_calls)
183 3907444 : CALL para_env%sum(r_report%sum_total_calls)
184 3907444 : CALL para_env%sum(r_report%sum_stackdepth)
185 :
186 : ! get value and rank of the maximum inclusive cost
187 11722332 : dbuf = (/r_report%max_icost, REAL(para_env%mepos, KIND=dp)/)
188 3907444 : CALL para_env%maxloc(dbuf)
189 3907444 : r_report%max_icost = dbuf(1)
190 3907444 : r_report%max_irank = INT(dbuf(2))
191 :
192 3907444 : CALL para_env%sum(r_report%sum_icost)
193 :
194 : ! get value and rank of the maximum exclusive cost
195 11722332 : dbuf = (/r_report%max_ecost, REAL(para_env%mepos, KIND=dp)/)
196 3907444 : CALL para_env%maxloc(dbuf)
197 3907444 : r_report%max_ecost = dbuf(1)
198 3907444 : r_report%max_erank = INT(dbuf(2))
199 :
200 3917245 : CALL para_env%sum(r_report%sum_ecost)
201 : END DO
202 :
203 9801 : END SUBROUTINE collect_reports_from_ranks
204 :
205 : ! **************************************************************************************************
206 : !> \brief Print the collected reports
207 : !> \param reports ...
208 : !> \param iw ...
209 : !> \param threshold ...
210 : !> \param sort_by_exclusiv_cost ...
211 : !> \param cost_type ...
212 : !> \param report_maxloc ...
213 : !> \param para_env ...
214 : !> \par History
215 : !> 01.2014 Refactored (Ole Schuett)
216 : !> \author JGH
217 : ! **************************************************************************************************
218 5004 : SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
219 : TYPE(list_routinereport_type), INTENT(IN) :: reports
220 : INTEGER, INTENT(IN) :: iw
221 : REAL(KIND=dp), INTENT(IN) :: threshold
222 : LOGICAL, INTENT(IN) :: sort_by_exclusiv_cost
223 : INTEGER, INTENT(IN) :: cost_type
224 : LOGICAL, INTENT(IN) :: report_maxloc
225 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
226 :
227 : CHARACTER(LEN=4) :: label
228 : CHARACTER(LEN=default_string_length) :: fmt, title
229 : INTEGER :: decimals, i, j, num_routines
230 5004 : INTEGER, ALLOCATABLE, DIMENSION(:) :: indices
231 : REAL(KIND=dp) :: asd, maxcost, mincost
232 5004 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: max_costs
233 : TYPE(routine_report_type), POINTER :: r_report_i, r_report_j
234 :
235 5004 : NULLIFY (r_report_i, r_report_j)
236 5004 : IF (.NOT. list_isready(reports)) &
237 0 : CPABORT("BUG")
238 :
239 : ! are we printing timing or energy ?
240 5004 : SELECT CASE (cost_type)
241 : CASE (cost_type_energy)
242 0 : title = "E N E R G Y"
243 0 : label = "ENER"
244 : CASE (cost_type_time)
245 5004 : title = "T I M I N G"
246 5004 : label = "TIME"
247 : CASE DEFAULT
248 5004 : CPABORT("BUG")
249 : END SELECT
250 :
251 : ! write banner
252 5004 : WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
253 5004 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
254 5004 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
255 5004 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
256 5004 : WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
257 5004 : IF (report_maxloc) THEN
258 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
259 0 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
260 : ELSE
261 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
262 5004 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
263 : END IF
264 :
265 : WRITE (UNIT=iw, FMT="(T33,A)") &
266 5004 : "MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM"
267 :
268 : ! sort statistics
269 5004 : num_routines = list_size(reports)
270 15012 : ALLOCATE (max_costs(num_routines))
271 1981679 : DO i = 1, num_routines
272 1976675 : r_report_i => list_get(reports, i)
273 1981679 : IF (sort_by_exclusiv_cost) THEN
274 1442 : max_costs(i) = r_report_i%max_ecost
275 : ELSE
276 1975233 : max_costs(i) = r_report_i%max_icost
277 : END IF
278 : END DO
279 15012 : ALLOCATE (indices(num_routines))
280 5004 : CALL sort(max_costs, num_routines, indices)
281 :
282 1986683 : maxcost = MAXVAL(max_costs)
283 5004 : mincost = maxcost*threshold
284 :
285 : ! adjust fmt dynamically based on the max walltime.
286 : ! few clocks have more than 3 digits resolution, so stop there
287 5004 : decimals = 3
288 5004 : IF (maxcost >= 10000) decimals = 2
289 0 : IF (maxcost >= 100000) decimals = 1
290 5004 : IF (maxcost >= 1000000) decimals = 0
291 5004 : IF (report_maxloc) THEN
292 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
293 0 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
294 : ELSE
295 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
296 5004 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
297 : END IF
298 :
299 : !write output
300 1981679 : DO i = num_routines, 1, -1
301 1981679 : IF (max_costs(i) >= mincost) THEN
302 267224 : j = indices(i)
303 267224 : r_report_j => list_get(reports, j)
304 : ! average stack depth
305 : asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
306 267224 : REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
307 267224 : IF (report_maxloc) THEN
308 : WRITE (UNIT=iw, FMT=fmt) &
309 0 : ADJUSTL(r_report_j%routineN(1:31)), &
310 0 : r_report_j%max_total_calls, &
311 0 : asd, &
312 0 : r_report_j%sum_ecost/para_env%num_pe, &
313 0 : r_report_j%max_ecost, &
314 0 : r_report_j%sum_icost/para_env%num_pe, &
315 0 : r_report_j%max_icost, &
316 0 : r_report_j%max_erank
317 : ELSE
318 : WRITE (UNIT=iw, FMT=fmt) &
319 267224 : ADJUSTL(r_report_j%routineN(1:31)), &
320 267224 : r_report_j%max_total_calls, &
321 267224 : asd, &
322 267224 : r_report_j%sum_ecost/para_env%num_pe, &
323 267224 : r_report_j%max_ecost, &
324 267224 : r_report_j%sum_icost/para_env%num_pe, &
325 534448 : r_report_j%max_icost
326 : END IF
327 : END IF
328 : END DO
329 5004 : WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
330 :
331 5004 : END SUBROUTINE print_reports
332 :
333 : ! **************************************************************************************************
334 : !> \brief Write accumulated callgraph information as cachegrind-file.
335 : !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
336 : !> \param filename ...
337 : !> \par History
338 : !> 12.2012 initial version[ole]
339 : !> \author Ole Schuett
340 : ! **************************************************************************************************
341 1 : SUBROUTINE timings_report_callgraph(filename)
342 : CHARACTER(len=*), INTENT(in) :: filename
343 :
344 : INTEGER, PARAMETER :: E = 1000, T = 100000
345 :
346 : INTEGER :: i, unit
347 : TYPE(call_stat_type), POINTER :: c_stat
348 1 : TYPE(callgraph_item_type), DIMENSION(:), POINTER :: ct_items
349 : TYPE(routine_stat_type), POINTER :: r_stat
350 : TYPE(timer_env_type), POINTER :: timer_env
351 :
352 : CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
353 1 : file_form="FORMATTED", unit_number=unit)
354 1 : timer_env => get_timer_env()
355 :
356 : ! use outermost routine as total runtime
357 1 : r_stat => list_get(timer_env%routine_stats, 1)
358 1 : WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
359 1 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
360 1 : INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
361 2 : INT(E*r_stat%incl_energy_accu, KIND=int_8)
362 :
363 453 : DO i = 1, list_size(timer_env%routine_stats)
364 452 : r_stat => list_get(timer_env%routine_stats, i)
365 452 : WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
366 452 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
367 452 : INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
368 905 : INT(E*r_stat%excl_energy_accu, KIND=int_8)
369 : END DO
370 :
371 1 : ct_items => callgraph_items(timer_env%callgraph)
372 779 : DO i = 1, SIZE(ct_items)
373 778 : c_stat => ct_items(i)%value
374 778 : WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
375 778 : WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
376 778 : WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
377 778 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
378 778 : INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
379 1557 : INT(E*c_stat%incl_energy_accu, KIND=int_8)
380 : END DO
381 1 : DEALLOCATE (ct_items)
382 :
383 1 : CALL close_file(unit_number=unit, file_status="KEEP")
384 :
385 1 : END SUBROUTINE timings_report_callgraph
386 : END MODULE timings_report
387 :
|