Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Timing routines for accounting
10 : !> \par History
11 : !> 02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
12 : !> 11.2004 storable timer_envs (for f77 interface) [fawzi]
13 : !> 10.2005 binary search to speed up lookup in timeset [fawzi]
14 : !> 12.2012 Complete rewrite based on dictionaries. [ole]
15 : !> 01.2014 Collect statistics from all MPI ranks. [ole]
16 : !> \author JGH
17 : ! **************************************************************************************************
18 : MODULE timings_report
19 : USE callgraph, ONLY: callgraph_item_type,&
20 : callgraph_items
21 : USE cp_files, ONLY: close_file,&
22 : open_file
23 : USE kinds, ONLY: default_string_length,&
24 : dp,&
25 : int_8
26 : USE list, ONLY: list_destroy,&
27 : list_get,&
28 : list_init,&
29 : list_isready,&
30 : list_pop,&
31 : list_push,&
32 : list_size
33 : USE list_routinereport, ONLY: list_routinereport_type
34 : USE message_passing, ONLY: mp_para_env_type
35 : USE routine_map, ONLY: routine_map_get,&
36 : routine_map_haskey
37 : USE timings, ONLY: get_timer_env
38 : USE timings_base_type, ONLY: call_stat_type,&
39 : routine_report_type,&
40 : routine_stat_type
41 : USE timings_types, ONLY: timer_env_type
42 : USE util, ONLY: sort
43 : #include "../base/base_uses.f90"
44 :
45 : IMPLICIT NONE
46 : PRIVATE
47 :
48 : INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
49 :
50 : PUBLIC :: timings_report_print, timings_report_callgraph
51 :
52 : CONTAINS
53 :
54 : ! **************************************************************************************************
55 : !> \brief Print accumulated information on timers
56 : !> \param iw ...
57 : !> \param r_timings ...
58 : !> \param sort_by_self_time ...
59 : !> \param cost_type ...
60 : !> \param report_maxloc ...
61 : !> \param para_env is needed to collect statistics from other nodes.
62 : !> \par History
63 : !> none
64 : !> \author JGH
65 : ! **************************************************************************************************
66 9127 : SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
67 : INTEGER, INTENT(IN) :: iw
68 : REAL(KIND=dp), INTENT(IN) :: r_timings
69 : LOGICAL, INTENT(IN) :: sort_by_self_time
70 : INTEGER, INTENT(IN) :: cost_type
71 : LOGICAL, INTENT(IN) :: report_maxloc
72 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
73 :
74 : TYPE(list_routinereport_type) :: reports
75 : TYPE(routine_report_type), POINTER :: r_report
76 :
77 9127 : CALL list_init(reports)
78 9127 : CALL collect_reports_from_ranks(reports, cost_type, para_env)
79 :
80 9127 : IF (list_size(reports) > 0 .AND. iw > 0) &
81 4667 : CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
82 :
83 : ! deallocate reports
84 3669691 : DO WHILE (list_size(reports) > 0)
85 3660564 : r_report => list_pop(reports)
86 3660564 : DEALLOCATE (r_report)
87 : END DO
88 9127 : CALL list_destroy(reports)
89 :
90 9127 : END SUBROUTINE timings_report_print
91 :
92 : ! **************************************************************************************************
93 : !> \brief Collects the timing or energy reports from all MPI ranks.
94 : !> \param reports ...
95 : !> \param cost_type ...
96 : !> \param para_env ...
97 : !> \author Ole Schuett
98 : ! **************************************************************************************************
99 9127 : SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
100 : TYPE(list_routinereport_type), INTENT(INOUT) :: reports
101 : INTEGER, INTENT(IN) :: cost_type
102 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
103 :
104 : CHARACTER(LEN=default_string_length) :: routineN
105 : INTEGER :: local_routine_id, sending_rank
106 9127 : INTEGER, ALLOCATABLE, DIMENSION(:) :: collected
107 : REAL(KIND=dp) :: foobar
108 : REAL(KIND=dp), DIMENSION(2) :: dbuf
109 : TYPE(routine_report_type), POINTER :: r_report
110 : TYPE(routine_stat_type), POINTER :: r_stat
111 : TYPE(timer_env_type), POINTER :: timer_env
112 :
113 9127 : NULLIFY (r_stat, r_report, timer_env)
114 9127 : IF (.NOT. list_isready(reports)) &
115 0 : CPABORT("BUG")
116 :
117 9127 : timer_env => get_timer_env()
118 :
119 : ! make sure all functions have been called so that list_size(timer_env%routine_stats)
120 : ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
121 : ! this hack makes sure they are called before
122 9127 : routineN = ""
123 9127 : CALL para_env%bcast(routineN, 0)
124 9127 : sending_rank = 0
125 9127 : CALL para_env%max(sending_rank)
126 9127 : CALL para_env%sum(sending_rank)
127 9127 : foobar = 0.0_dp
128 9127 : CALL para_env%max(foobar)
129 9127 : dbuf = 0.0_dp
130 9127 : CALL para_env%maxloc(dbuf)
131 9127 : CALL para_env%sum(foobar)
132 : ! end hack
133 :
134 : ! Array collected is used as a bit field.
135 : ! It's of type integer in order to use the convenient MINLOC routine.
136 27381 : ALLOCATE (collected(list_size(timer_env%routine_stats)))
137 3656592 : collected(:) = 0
138 :
139 3660564 : DO
140 : ! does any rank have uncollected stats?
141 3669691 : sending_rank = -1
142 751256412 : IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
143 3669691 : CALL para_env%max(sending_rank)
144 3669691 : IF (sending_rank < 0) EXIT ! every rank got all routines collected
145 3660564 : IF (sending_rank == para_env%mepos) THEN
146 899614472 : local_routine_id = MINLOC(collected, dim=1)
147 1853241 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
148 1853241 : routineN = r_stat%routineN
149 : END IF
150 3660564 : CALL para_env%bcast(routineN, sending_rank)
151 :
152 : ! Create new report for routineN
153 3660564 : ALLOCATE (r_report)
154 3660564 : CALL list_push(reports, r_report)
155 3660564 : r_report%routineN = routineN
156 :
157 : ! If routineN was called on local node, add local stats
158 3660564 : IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
159 3647465 : local_routine_id = routine_map_get(timer_env%routine_names, routineN)
160 3647465 : collected(local_routine_id) = 1
161 3647465 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
162 3647465 : r_report%max_total_calls = r_stat%total_calls
163 3647465 : r_report%sum_total_calls = r_stat%total_calls
164 3647465 : r_report%sum_stackdepth = r_stat%stackdepth_accu
165 3647465 : SELECT CASE (cost_type)
166 : CASE (cost_type_energy)
167 0 : r_report%max_icost = r_stat%incl_energy_accu
168 0 : r_report%sum_icost = r_stat%incl_energy_accu
169 0 : r_report%max_ecost = r_stat%excl_energy_accu
170 0 : r_report%sum_ecost = r_stat%excl_energy_accu
171 : CASE (cost_type_time)
172 3647465 : r_report%max_icost = r_stat%incl_walltime_accu
173 3647465 : r_report%sum_icost = r_stat%incl_walltime_accu
174 3647465 : r_report%max_ecost = r_stat%excl_walltime_accu
175 3647465 : r_report%sum_ecost = r_stat%excl_walltime_accu
176 : CASE DEFAULT
177 3647465 : CPABORT("BUG")
178 : END SELECT
179 : END IF
180 :
181 : ! collect stats of routineN via MPI
182 3660564 : CALL para_env%max(r_report%max_total_calls)
183 3660564 : CALL para_env%sum(r_report%sum_total_calls)
184 3660564 : CALL para_env%sum(r_report%sum_stackdepth)
185 :
186 : ! get value and rank of the maximum inclusive cost
187 10981692 : dbuf = (/r_report%max_icost, REAL(para_env%mepos, KIND=dp)/)
188 3660564 : CALL para_env%maxloc(dbuf)
189 3660564 : r_report%max_icost = dbuf(1)
190 3660564 : r_report%max_irank = INT(dbuf(2))
191 :
192 3660564 : CALL para_env%sum(r_report%sum_icost)
193 :
194 : ! get value and rank of the maximum exclusive cost
195 10981692 : dbuf = (/r_report%max_ecost, REAL(para_env%mepos, KIND=dp)/)
196 3660564 : CALL para_env%maxloc(dbuf)
197 3660564 : r_report%max_ecost = dbuf(1)
198 3660564 : r_report%max_erank = INT(dbuf(2))
199 :
200 3669691 : CALL para_env%sum(r_report%sum_ecost)
201 : END DO
202 :
203 9127 : END SUBROUTINE collect_reports_from_ranks
204 :
205 : ! **************************************************************************************************
206 : !> \brief Print the collected reports
207 : !> \param reports ...
208 : !> \param iw ...
209 : !> \param threshold ...
210 : !> \param sort_by_exclusiv_cost ...
211 : !> \param cost_type ...
212 : !> \param report_maxloc ...
213 : !> \param para_env ...
214 : !> \par History
215 : !> 01.2014 Refactored (Ole Schuett)
216 : !> \author JGH
217 : ! **************************************************************************************************
218 4667 : SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
219 : TYPE(list_routinereport_type), INTENT(IN) :: reports
220 : INTEGER, INTENT(IN) :: iw
221 : REAL(KIND=dp), INTENT(IN) :: threshold
222 : LOGICAL, INTENT(IN) :: sort_by_exclusiv_cost
223 : INTEGER, INTENT(IN) :: cost_type
224 : LOGICAL, INTENT(IN) :: report_maxloc
225 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
226 :
227 : CHARACTER(LEN=4) :: label
228 : CHARACTER(LEN=default_string_length) :: fmt, title
229 : INTEGER :: decimals, i, j, num_routines
230 4667 : INTEGER, ALLOCATABLE, DIMENSION(:) :: indices
231 : REAL(KIND=dp) :: asd, maxcost, mincost
232 4667 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: max_costs
233 : TYPE(routine_report_type), POINTER :: r_report_i, r_report_j
234 :
235 4667 : NULLIFY (r_report_i, r_report_j)
236 4667 : IF (.NOT. list_isready(reports)) &
237 0 : CPABORT("BUG")
238 :
239 : ! are we printing timing or energy ?
240 4667 : SELECT CASE (cost_type)
241 : CASE (cost_type_energy)
242 0 : title = "E N E R G Y"
243 0 : label = "ENER"
244 : CASE (cost_type_time)
245 4667 : title = "T I M I N G"
246 4667 : label = "TIME"
247 : CASE DEFAULT
248 4667 : CPABORT("BUG")
249 : END SELECT
250 :
251 : ! write banner
252 4667 : WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
253 4667 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
254 4667 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
255 4667 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
256 4667 : WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
257 4667 : IF (report_maxloc) THEN
258 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
259 0 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
260 : ELSE
261 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
262 4667 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
263 : END IF
264 :
265 : WRITE (UNIT=iw, FMT="(T33,A)") &
266 4667 : "MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM"
267 :
268 : ! sort statistics
269 4667 : num_routines = list_size(reports)
270 14001 : ALLOCATE (max_costs(num_routines))
271 1857908 : DO i = 1, num_routines
272 1853241 : r_report_i => list_get(reports, i)
273 1857908 : IF (sort_by_exclusiv_cost) THEN
274 1436 : max_costs(i) = r_report_i%max_ecost
275 : ELSE
276 1851805 : max_costs(i) = r_report_i%max_icost
277 : END IF
278 : END DO
279 14001 : ALLOCATE (indices(num_routines))
280 4667 : CALL sort(max_costs, num_routines, indices)
281 :
282 1862575 : maxcost = MAXVAL(max_costs)
283 4667 : mincost = maxcost*threshold
284 :
285 : ! adjust fmt dynamically based on the max walltime.
286 : ! few clocks have more than 3 digits resolution, so stop there
287 4667 : decimals = 3
288 4667 : IF (maxcost >= 10000) decimals = 2
289 0 : IF (maxcost >= 100000) decimals = 1
290 4667 : IF (maxcost >= 1000000) decimals = 0
291 4667 : IF (report_maxloc) THEN
292 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
293 0 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
294 : ELSE
295 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
296 4667 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
297 : END IF
298 :
299 : !write output
300 1857908 : DO i = num_routines, 1, -1
301 1857908 : IF (max_costs(i) >= mincost) THEN
302 257594 : j = indices(i)
303 257594 : r_report_j => list_get(reports, j)
304 : ! average stack depth
305 : asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
306 257594 : REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
307 257594 : IF (report_maxloc) THEN
308 : WRITE (UNIT=iw, FMT=fmt) &
309 0 : ADJUSTL(r_report_j%routineN(1:31)), &
310 0 : r_report_j%max_total_calls, &
311 0 : asd, &
312 0 : r_report_j%sum_ecost/para_env%num_pe, &
313 0 : r_report_j%max_ecost, &
314 0 : r_report_j%sum_icost/para_env%num_pe, &
315 0 : r_report_j%max_icost, &
316 0 : r_report_j%max_erank
317 : ELSE
318 : WRITE (UNIT=iw, FMT=fmt) &
319 257594 : ADJUSTL(r_report_j%routineN(1:31)), &
320 257594 : r_report_j%max_total_calls, &
321 257594 : asd, &
322 257594 : r_report_j%sum_ecost/para_env%num_pe, &
323 257594 : r_report_j%max_ecost, &
324 257594 : r_report_j%sum_icost/para_env%num_pe, &
325 515188 : r_report_j%max_icost
326 : END IF
327 : END IF
328 : END DO
329 4667 : WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
330 :
331 4667 : END SUBROUTINE print_reports
332 :
333 : ! **************************************************************************************************
334 : !> \brief Write accumulated callgraph information as cachegrind-file.
335 : !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
336 : !> \param filename ...
337 : !> \par History
338 : !> 12.2012 initial version[ole]
339 : !> \author Ole Schuett
340 : ! **************************************************************************************************
341 1 : SUBROUTINE timings_report_callgraph(filename)
342 : CHARACTER(len=*), INTENT(in) :: filename
343 :
344 : INTEGER, PARAMETER :: E = 1000, T = 100000
345 :
346 : INTEGER :: i, unit
347 : TYPE(call_stat_type), POINTER :: c_stat
348 1 : TYPE(callgraph_item_type), DIMENSION(:), POINTER :: ct_items
349 : TYPE(routine_stat_type), POINTER :: r_stat
350 : TYPE(timer_env_type), POINTER :: timer_env
351 :
352 : CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
353 1 : file_form="FORMATTED", unit_number=unit)
354 1 : timer_env => get_timer_env()
355 :
356 : ! use outermost routine as total runtime
357 1 : r_stat => list_get(timer_env%routine_stats, 1)
358 1 : WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
359 1 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
360 1 : INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
361 2 : INT(E*r_stat%incl_energy_accu, KIND=int_8)
362 :
363 454 : DO i = 1, list_size(timer_env%routine_stats)
364 453 : r_stat => list_get(timer_env%routine_stats, i)
365 453 : WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
366 453 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
367 453 : INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
368 907 : INT(E*r_stat%excl_energy_accu, KIND=int_8)
369 : END DO
370 :
371 1 : ct_items => callgraph_items(timer_env%callgraph)
372 780 : DO i = 1, SIZE(ct_items)
373 779 : c_stat => ct_items(i)%value
374 779 : WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
375 779 : WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
376 779 : WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
377 779 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
378 779 : INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
379 1559 : INT(E*c_stat%incl_energy_accu, KIND=int_8)
380 : END DO
381 1 : DEALLOCATE (ct_items)
382 :
383 1 : CALL close_file(unit_number=unit, file_status="KEEP")
384 :
385 1 : END SUBROUTINE timings_report_callgraph
386 : END MODULE timings_report
387 :
|