Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : static int openmp_trace_issues_n;
9 : static int openmp_trace_level;
10 :
11 : int openmp_trace_issues(void);
12 9978 : int openmp_trace_issues(void) { /* routine is exposed in Fortran interface */
13 9978 : return 0 != openmp_trace_level ? openmp_trace_issues_n : -1 /*disabled*/;
14 : }
15 :
16 : #if defined(_OPENMP)
17 : /* #include <omp.h>: avoid functionality being traced */
18 : #include <assert.h>
19 : #include <ctype.h>
20 : #include <stdint.h>
21 : #include <stdio.h>
22 : #include <stdlib.h>
23 : #include <string.h>
24 :
25 : /**
26 : * Simple compile-time check if OMPT is available (omp/iomp, not gomp).
27 : * __clang__: omp and iomp/icx, __INTEL_COMPILER: iomp/icc
28 : * __INTEL_LLVM_COMPILER: already covered by __clang__
29 : */
30 : #if (defined(__clang__) || defined(__INTEL_COMPILER))
31 : #include <omp-tools.h>
32 : #else
33 : typedef struct ompt_frame_t ompt_frame_t;
34 : typedef void *ompt_initialize_t;
35 : typedef void *ompt_finalize_t;
36 : typedef void *ompt_callback_t;
37 : typedef union ompt_data_t {
38 : uint64_t value;
39 : void *ptr;
40 : } ompt_data_t;
41 : typedef struct ompt_start_tool_result_t {
42 : ompt_initialize_t initialize;
43 : ompt_finalize_t finalize;
44 : ompt_data_t tool_data;
45 : } ompt_start_tool_result_t;
46 : typedef enum ompt_scope_endpoint_t {
47 : ompt_scope_begin = 1,
48 : ompt_scope_end,
49 : ompt_scope_beginend
50 : } ompt_scope_endpoint_t;
51 : typedef enum ompt_set_result_t {
52 : ompt_set_never = 1,
53 : } ompt_set_result_t;
54 : typedef enum ompt_callbacks_t {
55 : ompt_callback_parallel_begin = 3,
56 : ompt_callback_parallel_end = 4,
57 : ompt_callback_work = 20,
58 : ompt_callback_master = 21,
59 : ompt_callback_sync_region = 23,
60 : } ompt_callbacks_t;
61 : typedef enum ompt_parallel_flag_t {
62 : ompt_parallel_team = 0x80000000
63 : } ompt_parallel_flag_t;
64 : typedef enum ompt_sync_region_t {
65 : ompt_sync_region_barrier = 1,
66 : ompt_sync_region_barrier_implicit,
67 : ompt_sync_region_barrier_explicit,
68 : ompt_sync_region_barrier_implementation
69 : } ompt_sync_region_t;
70 : typedef enum ompt_work_t {
71 : ompt_work_loop = 1,
72 : ompt_work_sections,
73 : ompt_work_single_executor,
74 : ompt_work_single_other,
75 : ompt_work_workshare,
76 : } ompt_work_t;
77 :
78 : typedef void (*ompt_interface_fn_t)(void);
79 : typedef int (*ompt_get_parallel_info_t)(int, ompt_data_t **, int *);
80 : typedef ompt_interface_fn_t (*ompt_function_lookup_t)(const char *);
81 : typedef ompt_set_result_t (*ompt_set_callback_t)(ompt_callbacks_t,
82 : ompt_callback_t);
83 : #endif
84 :
85 : #if !defined(_WIN32) && !defined(__CYGWIN__) && !defined(OPENMP_TRACE_SYMBOL)
86 : #define OPENMP_TRACE_SYMBOL
87 : #include <execinfo.h>
88 : #include <unistd.h>
89 : #endif
90 :
91 : #define OPENMP_TRACE_PTR_KIND(PTR) (int)(((uintptr_t)(PTR)) >> 56)
92 : #define OPENMP_TRACE_PTR_SYMBOL(PTR) \
93 : (const void *)(0x0FFFFFFFFFFFFFFF & ((uintptr_t)(PTR)))
94 : #define OPENMP_TRACE_PTR(PTR, KIND) \
95 : (const void *)((((uintptr_t)(0xF & (KIND))) << 56) | \
96 : (uintptr_t)OPENMP_TRACE_PTR_SYMBOL(PTR))
97 : #define OPENMP_TRACE_SET_CALLBACK(PREFIX, NAME) \
98 : if (ompt_set_never == \
99 : set_callback(ompt_callback_##NAME, (ompt_callback_t)PREFIX##_##NAME)) { \
100 : ++openmp_trace_issues_n; \
101 : }
102 : #define OPENMP_TRACE_PRINT(KIND, FORMAT, ...) \
103 : fprintf(stderr, "OMP/TRACE %s: " FORMAT, KIND, __VA_ARGS__)
104 : #define OPENMP_TRACE_UNUSED(VAR) (void)VAR
105 : #if 0
106 : #define OPENMP_TRACE_ENABLE(FEATURE) (FEATURE)
107 : #else
108 : #define OPENMP_TRACE_ENABLE(FEATURE) 0
109 : #endif
110 :
111 : enum {
112 : openmp_trace_level_deflt = 2,
113 : openmp_trace_level_high = 4,
114 : openmp_trace_level_warn,
115 : openmp_trace_level_info
116 : };
117 :
118 : static int openmp_trace_parallel_n;
119 : static int openmp_trace_sync_n;
120 :
121 : static const void *openmp_trace_parallel;
122 : static ompt_data_t *openmp_trace_sync;
123 :
124 : static ompt_get_parallel_info_t openmp_trace_get_parallel_info;
125 :
126 : /* translate debug symbol (address) to character string */
127 0 : static void openmp_trace_symbol(const void *symbol, char *str, size_t size,
128 : int cleanup) {
129 0 : if (NULL != str && 0 < size) {
130 : #if !defined(OPENMP_TRACE_SYMBOL)
131 : OPENMP_TRACE_UNUSED(symbol);
132 : #else
133 0 : int pipefd[2];
134 0 : if (NULL != symbol && 0 == pipe(pipefd)) {
135 0 : void *const backtrace[] = {(void *)(uintptr_t)symbol};
136 0 : backtrace_symbols_fd(backtrace, 1, pipefd[1]);
137 0 : close(pipefd[1]);
138 0 : if (0 < read(pipefd[0], str, size)) {
139 0 : char *s = (char *)(0 != cleanup ? memchr(str, '(', size) : NULL);
140 0 : char *t =
141 0 : (char *)(NULL != s ? memchr(s + 1, '+', size - (s - str)) : NULL);
142 0 : if (NULL != t) {
143 0 : *t = '\0';
144 0 : memmove(str, s + 1, t - s);
145 : }
146 0 : s = (char *)memchr(str, '\n', size);
147 0 : if (NULL != s) {
148 0 : *s = '\0';
149 : }
150 0 : for (s = str; s < (str + size) && '\0' != *s; ++s) {
151 0 : if (0 == isprint(*s)) {
152 0 : *str = '\0';
153 0 : break;
154 : }
155 : }
156 : } else {
157 0 : *str = '\0';
158 : }
159 0 : close(pipefd[0]);
160 : } else
161 : #endif
162 0 : { *str = '\0'; }
163 : }
164 0 : }
165 :
166 : /* give a name to a kind of synchronization construct */
167 0 : static const char *openmp_trace_sync_name(int kind) {
168 0 : static const char *kinds[] = {
169 : "master", "barrier", "implicit barrier", "explicit barrier",
170 : "sections", "single", "single", "workshare"};
171 0 : return (kind * sizeof(*kinds)) < sizeof(kinds) ? kinds[kind]
172 0 : : "synchronization";
173 : }
174 :
175 : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
176 0 : static void openmp_trace_parallel_begin(
177 : ompt_data_t *encountering_task_data,
178 : const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
179 : unsigned int requested_parallelism, int flags, const void *codeptr_ra) {
180 0 : OPENMP_TRACE_UNUSED(encountering_task_data);
181 0 : OPENMP_TRACE_UNUSED(encountering_task_frame);
182 0 : OPENMP_TRACE_UNUSED(parallel_data);
183 0 : OPENMP_TRACE_UNUSED(requested_parallelism);
184 0 : if (ompt_parallel_team & flags) {
185 0 : const ompt_data_t *sync;
186 : #pragma omp atomic read
187 0 : sync = openmp_trace_sync;
188 0 : if (NULL != sync) {
189 0 : const int kind = OPENMP_TRACE_PTR_KIND(sync->ptr);
190 0 : if (ompt_sync_region_barrier_implementation > kind) {
191 0 : ++openmp_trace_issues_n;
192 : }
193 0 : if (1 /*assert*/ < openmp_trace_level || 0 > openmp_trace_level) {
194 0 : const char *const type =
195 0 : (ompt_sync_region_barrier_implementation > kind ? "ERROR" : "WARN");
196 0 : if ('E' == *type || openmp_trace_level_warn <= openmp_trace_level) {
197 0 : const char *const name = openmp_trace_sync_name(kind);
198 0 : char symbol[1024], symbol2[1024];
199 0 : openmp_trace_symbol(codeptr_ra, symbol, sizeof(symbol),
200 : 1 /*cleanup*/);
201 0 : openmp_trace_symbol(OPENMP_TRACE_PTR_SYMBOL(sync->ptr), symbol2,
202 : sizeof(symbol2), 1 /*cleanup*/);
203 0 : if ('\0' != *symbol) {
204 0 : if ('\0' != *symbol2) {
205 0 : OPENMP_TRACE_PRINT(type,
206 : "parallel region \"%s\" opened in %s \"%s\"\n",
207 : symbol, name, symbol2);
208 : } else {
209 0 : OPENMP_TRACE_PRINT(type, "parallel region \"%s\" opened in %s\n",
210 : symbol, name);
211 : }
212 : } else {
213 0 : if ('\0' != *symbol2) {
214 0 : OPENMP_TRACE_PRINT(type, "parallel region opened in %s \"%s\"\n",
215 : name, symbol2);
216 : } else {
217 0 : OPENMP_TRACE_PRINT(type, "parallel region opened in %s\n", name);
218 : }
219 : }
220 : }
221 : } else {
222 0 : assert(0);
223 : }
224 : }
225 : }
226 0 : }
227 :
228 : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
229 0 : static void openmp_trace_parallel_end(ompt_data_t *parallel_data,
230 : ompt_data_t *encountering_task_data,
231 : int flags, const void *codeptr_ra) {
232 0 : OPENMP_TRACE_UNUSED(parallel_data);
233 0 : OPENMP_TRACE_UNUSED(encountering_task_data);
234 0 : if (0 != (ompt_parallel_team & flags) &&
235 0 : 0 != openmp_trace_get_parallel_info(openmp_trace_parallel_n + 1, NULL,
236 : NULL)) {
237 0 : openmp_trace_parallel = codeptr_ra;
238 0 : ++openmp_trace_parallel_n;
239 : }
240 0 : }
241 :
242 : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
243 0 : static void openmp_trace_master(ompt_scope_endpoint_t endpoint,
244 : ompt_data_t *parallel_data,
245 : ompt_data_t *task_data,
246 : const void *codeptr_ra) {
247 0 : OPENMP_TRACE_UNUSED(task_data);
248 0 : if (NULL != parallel_data) {
249 0 : int sync_n;
250 0 : switch ((int)endpoint) {
251 0 : case OPENMP_TRACE_ENABLE(ompt_scope_beginend):
252 : case ompt_scope_begin: {
253 0 : if (OPENMP_TRACE_ENABLE(ompt_scope_beginend) != endpoint) {
254 0 : #pragma omp atomic capture
255 : sync_n = openmp_trace_sync_n++;
256 : } else {
257 : #pragma omp atomic read
258 0 : sync_n = openmp_trace_sync_n;
259 : }
260 0 : if (0 == sync_n) {
261 0 : assert(OPENMP_TRACE_PTR(codeptr_ra, 0) == codeptr_ra);
262 0 : parallel_data->ptr = (void *)(uintptr_t)codeptr_ra;
263 0 : openmp_trace_sync = parallel_data;
264 : }
265 : } break;
266 0 : case ompt_scope_end: {
267 0 : #pragma omp atomic capture
268 : sync_n = --openmp_trace_sync_n;
269 0 : if (0 == sync_n) {
270 0 : openmp_trace_sync = NULL;
271 : }
272 : } break;
273 : }
274 : }
275 0 : }
276 :
277 : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
278 0 : void openmp_trace_sync_region(ompt_sync_region_t kind,
279 : ompt_scope_endpoint_t endpoint,
280 : ompt_data_t *parallel_data,
281 : ompt_data_t *task_data, const void *codeptr_ra) {
282 0 : OPENMP_TRACE_UNUSED(task_data);
283 0 : assert(0 < kind);
284 0 : if (NULL != parallel_data && ompt_sync_region_barrier_implementation > kind) {
285 0 : int sync_n;
286 0 : switch ((int)endpoint) {
287 0 : case OPENMP_TRACE_ENABLE(ompt_scope_beginend):
288 : case ompt_scope_begin: {
289 0 : if (OPENMP_TRACE_ENABLE(ompt_scope_beginend) != endpoint) {
290 0 : #pragma omp atomic capture
291 : sync_n = openmp_trace_sync_n++;
292 : } else {
293 : #pragma omp atomic read
294 0 : sync_n = openmp_trace_sync_n;
295 : }
296 0 : if (0 == sync_n) {
297 0 : assert(OPENMP_TRACE_PTR(codeptr_ra, 0) == codeptr_ra);
298 0 : parallel_data->ptr =
299 0 : (void *)(uintptr_t)OPENMP_TRACE_PTR(codeptr_ra, kind);
300 0 : openmp_trace_sync = parallel_data;
301 0 : } else if (openmp_trace_level_warn <= openmp_trace_level ||
302 : 0 > openmp_trace_level) {
303 0 : const ompt_data_t *sync;
304 : #pragma omp atomic read
305 0 : sync = openmp_trace_sync;
306 0 : if (NULL != sync && parallel_data != sync) {
307 0 : const char *const name = openmp_trace_sync_name(kind);
308 0 : char symbol[1024], symbol2[1024];
309 0 : openmp_trace_symbol(codeptr_ra, symbol, sizeof(symbol),
310 : 1 /*cleanup*/);
311 0 : openmp_trace_symbol(OPENMP_TRACE_PTR_SYMBOL(sync->ptr), symbol2,
312 : sizeof(symbol2), 1 /*cleanup*/);
313 0 : if ('\0' != *symbol) {
314 0 : if ('\0' != *symbol2) {
315 0 : OPENMP_TRACE_PRINT("WARN",
316 : "potential deadlock at \"%s\" in %s \"%s\"\n",
317 : symbol2, name, symbol);
318 : } else {
319 0 : OPENMP_TRACE_PRINT("WARN", "potential deadlock in %s \"%s\"\n",
320 : name, symbol);
321 : }
322 : } else {
323 0 : if ('\0' != *symbol2) {
324 0 : OPENMP_TRACE_PRINT("WARN", "potential deadlock at \"%s\" in %s\n",
325 : symbol2, name);
326 : } else {
327 0 : OPENMP_TRACE_PRINT("WARN", "potential deadlock in %s\n", name);
328 : }
329 : }
330 : }
331 : }
332 : } break;
333 0 : case ompt_scope_end: {
334 0 : #pragma omp atomic capture
335 : sync_n = --openmp_trace_sync_n;
336 0 : if (0 == sync_n) {
337 0 : openmp_trace_sync = NULL;
338 : }
339 : } break;
340 : }
341 : }
342 0 : }
343 :
344 : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
345 0 : static void openmp_trace_work(ompt_work_t wstype,
346 : ompt_scope_endpoint_t endpoint,
347 : ompt_data_t *parallel_data,
348 : ompt_data_t *task_data, uint64_t count,
349 : const void *codeptr_ra) {
350 0 : OPENMP_TRACE_UNUSED(task_data);
351 0 : OPENMP_TRACE_UNUSED(count);
352 0 : assert(0 < wstype);
353 0 : if (NULL != parallel_data && ompt_work_sections <= wstype &&
354 : wstype <= ompt_work_workshare) {
355 0 : int sync_n;
356 0 : switch ((int)endpoint) {
357 0 : case OPENMP_TRACE_ENABLE(ompt_scope_beginend):
358 : case ompt_scope_begin: {
359 0 : if (OPENMP_TRACE_ENABLE(ompt_scope_beginend) != endpoint) {
360 0 : #pragma omp atomic capture
361 : sync_n = openmp_trace_sync_n++;
362 : } else {
363 : #pragma omp atomic read
364 0 : sync_n = openmp_trace_sync_n;
365 : }
366 0 : if (0 == sync_n) {
367 0 : const int kind = wstype - ompt_work_sections +
368 : ompt_sync_region_barrier_implementation;
369 0 : assert(OPENMP_TRACE_PTR(codeptr_ra, 0) == codeptr_ra);
370 0 : parallel_data->ptr =
371 0 : (void *)(uintptr_t)OPENMP_TRACE_PTR(codeptr_ra, kind);
372 0 : openmp_trace_sync = parallel_data;
373 : }
374 : } break;
375 0 : case ompt_scope_end: {
376 0 : #pragma omp atomic capture
377 : sync_n = --openmp_trace_sync_n;
378 0 : if (0 == sync_n) {
379 0 : openmp_trace_sync = NULL;
380 : }
381 : } break;
382 : }
383 : }
384 0 : }
385 :
386 : /* initially, events of interest are registered */
387 0 : static int openmp_trace_initialize(ompt_function_lookup_t lookup,
388 : int initial_device_num,
389 : ompt_data_t *tool_data) {
390 0 : const ompt_set_callback_t set_callback =
391 0 : (ompt_set_callback_t)lookup("ompt_set_callback");
392 0 : openmp_trace_get_parallel_info =
393 0 : (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
394 0 : OPENMP_TRACE_UNUSED(initial_device_num);
395 0 : OPENMP_TRACE_UNUSED(tool_data);
396 0 : OPENMP_TRACE_SET_CALLBACK(openmp_trace, parallel_begin);
397 0 : OPENMP_TRACE_SET_CALLBACK(openmp_trace, parallel_end);
398 0 : OPENMP_TRACE_SET_CALLBACK(openmp_trace, master);
399 0 : if (openmp_trace_level_deflt < openmp_trace_level || 0 > openmp_trace_level) {
400 0 : OPENMP_TRACE_SET_CALLBACK(openmp_trace, sync_region);
401 : }
402 0 : if (openmp_trace_level_high <= openmp_trace_level || 0 > openmp_trace_level) {
403 0 : OPENMP_TRACE_SET_CALLBACK(openmp_trace, work);
404 : }
405 0 : assert(NULL != openmp_trace_get_parallel_info);
406 0 : return 0 == openmp_trace_issues();
407 : }
408 :
409 : /* here tool_data might be freed and analysis concludes */
410 0 : static void openmp_trace_finalize(ompt_data_t *tool_data) {
411 0 : OPENMP_TRACE_UNUSED(tool_data);
412 0 : if (openmp_trace_level_info <= openmp_trace_level || 0 > openmp_trace_level) {
413 0 : if (1 < openmp_trace_parallel_n) { /* nested */
414 0 : char symbol[1024];
415 0 : openmp_trace_symbol(openmp_trace_parallel, symbol, sizeof(symbol),
416 : 1 /*cleanup*/);
417 0 : if ('\0' != *symbol) {
418 0 : OPENMP_TRACE_PRINT("INFO", "parallelism in \"%s\" is nested (%i)\n",
419 : symbol, openmp_trace_parallel_n);
420 : } else {
421 0 : OPENMP_TRACE_PRINT("INFO", "parallelism is nested (%i)\n",
422 : openmp_trace_parallel_n);
423 : }
424 : }
425 : }
426 0 : }
427 :
428 : /* entry point which is automatically called by the OpenMP runtime */
429 0 : ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
430 : const char *runtime_version) {
431 0 : static ompt_start_tool_result_t openmp_start_tool;
432 0 : const char *const enabled_env = getenv("CP2K_OMP_TRACE");
433 0 : ompt_start_tool_result_t *result = NULL;
434 0 : openmp_trace_level = (NULL == enabled_env ? 0 : atoi(enabled_env));
435 0 : OPENMP_TRACE_UNUSED(omp_version);
436 0 : OPENMP_TRACE_UNUSED(runtime_version);
437 0 : if (0 != openmp_trace_level) { /* trace OpenMP constructs */
438 0 : openmp_start_tool.initialize = (ompt_initialize_t)openmp_trace_initialize;
439 0 : openmp_start_tool.finalize = (ompt_finalize_t)openmp_trace_finalize;
440 0 : openmp_start_tool.tool_data.ptr = NULL;
441 0 : result = &openmp_start_tool;
442 : #if defined(NDEBUG)
443 : if (1 == openmp_trace_level) {
444 : openmp_trace_level = 2; /* adjust trace level */
445 : }
446 : #endif
447 : }
448 0 : return result;
449 : }
450 :
451 : #endif
|