LCOV - code coverage report
Current view: top level - src/base - openmp_trace.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:2fce0f8) Lines: 2 182 1.1 %
Date: 2024-12-21 06:28:57 Functions: 1 11 9.1 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : static int openmp_trace_issues_n;
       9             : static int openmp_trace_level;
      10             : 
      11             : int openmp_trace_issues(void);
      12        9978 : int openmp_trace_issues(void) { /* routine is exposed in Fortran interface */
      13        9978 :   return 0 != openmp_trace_level ? openmp_trace_issues_n : -1 /*disabled*/;
      14             : }
      15             : 
      16             : #if defined(_OPENMP)
      17             : /* #include <omp.h>: avoid functionality being traced */
      18             : #include <assert.h>
      19             : #include <ctype.h>
      20             : #include <stdint.h>
      21             : #include <stdio.h>
      22             : #include <stdlib.h>
      23             : #include <string.h>
      24             : 
      25             : /**
      26             :  * Simple compile-time check if OMPT is available (omp/iomp, not gomp).
      27             :  * __clang__: omp and iomp/icx, __INTEL_COMPILER: iomp/icc
      28             :  * __INTEL_LLVM_COMPILER: already covered by __clang__
      29             :  */
      30             : #if (defined(__clang__) || defined(__INTEL_COMPILER))
      31             : #include <omp-tools.h>
      32             : #else
      33             : typedef struct ompt_frame_t ompt_frame_t;
      34             : typedef void *ompt_initialize_t;
      35             : typedef void *ompt_finalize_t;
      36             : typedef void *ompt_callback_t;
      37             : typedef union ompt_data_t {
      38             :   uint64_t value;
      39             :   void *ptr;
      40             : } ompt_data_t;
      41             : typedef struct ompt_start_tool_result_t {
      42             :   ompt_initialize_t initialize;
      43             :   ompt_finalize_t finalize;
      44             :   ompt_data_t tool_data;
      45             : } ompt_start_tool_result_t;
      46             : typedef enum ompt_scope_endpoint_t {
      47             :   ompt_scope_begin = 1,
      48             :   ompt_scope_end,
      49             :   ompt_scope_beginend
      50             : } ompt_scope_endpoint_t;
      51             : typedef enum ompt_set_result_t {
      52             :   ompt_set_never = 1,
      53             : } ompt_set_result_t;
      54             : typedef enum ompt_callbacks_t {
      55             :   ompt_callback_parallel_begin = 3,
      56             :   ompt_callback_parallel_end = 4,
      57             :   ompt_callback_work = 20,
      58             :   ompt_callback_master = 21,
      59             :   ompt_callback_sync_region = 23,
      60             : } ompt_callbacks_t;
      61             : typedef enum ompt_parallel_flag_t {
      62             :   ompt_parallel_team = 0x80000000
      63             : } ompt_parallel_flag_t;
      64             : typedef enum ompt_sync_region_t {
      65             :   ompt_sync_region_barrier = 1,
      66             :   ompt_sync_region_barrier_implicit,
      67             :   ompt_sync_region_barrier_explicit,
      68             :   ompt_sync_region_barrier_implementation
      69             : } ompt_sync_region_t;
      70             : typedef enum ompt_work_t {
      71             :   ompt_work_loop = 1,
      72             :   ompt_work_sections,
      73             :   ompt_work_single_executor,
      74             :   ompt_work_single_other,
      75             :   ompt_work_workshare,
      76             : } ompt_work_t;
      77             : 
      78             : typedef void (*ompt_interface_fn_t)(void);
      79             : typedef int (*ompt_get_parallel_info_t)(int, ompt_data_t **, int *);
      80             : typedef ompt_interface_fn_t (*ompt_function_lookup_t)(const char *);
      81             : typedef ompt_set_result_t (*ompt_set_callback_t)(ompt_callbacks_t,
      82             :                                                  ompt_callback_t);
      83             : #endif
      84             : 
      85             : #if !defined(_WIN32) && !defined(__CYGWIN__) && !defined(OPENMP_TRACE_SYMBOL)
      86             : #define OPENMP_TRACE_SYMBOL
      87             : #include <execinfo.h>
      88             : #include <unistd.h>
      89             : #endif
      90             : 
      91             : #define OPENMP_TRACE_PTR_KIND(PTR) (int)(((uintptr_t)(PTR)) >> 56)
      92             : #define OPENMP_TRACE_PTR_SYMBOL(PTR)                                           \
      93             :   (const void *)(0x0FFFFFFFFFFFFFFF & ((uintptr_t)(PTR)))
      94             : #define OPENMP_TRACE_PTR(PTR, KIND)                                            \
      95             :   (const void *)((((uintptr_t)(0xF & (KIND))) << 56) |                         \
      96             :                  (uintptr_t)OPENMP_TRACE_PTR_SYMBOL(PTR))
      97             : #define OPENMP_TRACE_SET_CALLBACK(PREFIX, NAME)                                \
      98             :   if (ompt_set_never ==                                                        \
      99             :       set_callback(ompt_callback_##NAME, (ompt_callback_t)PREFIX##_##NAME)) {  \
     100             :     ++openmp_trace_issues_n;                                                   \
     101             :   }
     102             : #define OPENMP_TRACE_PRINT(KIND, FORMAT, ...)                                  \
     103             :   fprintf(stderr, "OMP/TRACE %s: " FORMAT, KIND, __VA_ARGS__)
     104             : #define OPENMP_TRACE_UNUSED(VAR) (void)VAR
     105             : #if 0
     106             : #define OPENMP_TRACE_ENABLE(FEATURE) (FEATURE)
     107             : #else
     108             : #define OPENMP_TRACE_ENABLE(FEATURE) 0
     109             : #endif
     110             : 
     111             : enum {
     112             :   openmp_trace_level_deflt = 2,
     113             :   openmp_trace_level_high = 4,
     114             :   openmp_trace_level_warn,
     115             :   openmp_trace_level_info
     116             : };
     117             : 
     118             : static int openmp_trace_parallel_n;
     119             : static int openmp_trace_sync_n;
     120             : 
     121             : static const void *openmp_trace_parallel;
     122             : static ompt_data_t *openmp_trace_sync;
     123             : 
     124             : static ompt_get_parallel_info_t openmp_trace_get_parallel_info;
     125             : 
     126             : /* translate debug symbol (address) to character string */
     127           0 : static void openmp_trace_symbol(const void *symbol, char *str, size_t size,
     128             :                                 int cleanup) {
     129           0 :   if (NULL != str && 0 < size) {
     130             : #if !defined(OPENMP_TRACE_SYMBOL)
     131             :     OPENMP_TRACE_UNUSED(symbol);
     132             : #else
     133           0 :     int pipefd[2];
     134           0 :     if (NULL != symbol && 0 == pipe(pipefd)) {
     135           0 :       void *const backtrace[] = {(void *)(uintptr_t)symbol};
     136           0 :       backtrace_symbols_fd(backtrace, 1, pipefd[1]);
     137           0 :       close(pipefd[1]);
     138           0 :       if (0 < read(pipefd[0], str, size)) {
     139           0 :         char *s = (char *)(0 != cleanup ? memchr(str, '(', size) : NULL);
     140           0 :         char *t =
     141           0 :             (char *)(NULL != s ? memchr(s + 1, '+', size - (s - str)) : NULL);
     142           0 :         if (NULL != t) {
     143           0 :           *t = '\0';
     144           0 :           memmove(str, s + 1, t - s);
     145             :         }
     146           0 :         s = (char *)memchr(str, '\n', size);
     147           0 :         if (NULL != s) {
     148           0 :           *s = '\0';
     149             :         }
     150           0 :         for (s = str; s < (str + size) && '\0' != *s; ++s) {
     151           0 :           if (0 == isprint(*s)) {
     152           0 :             *str = '\0';
     153           0 :             break;
     154             :           }
     155             :         }
     156             :       } else {
     157           0 :         *str = '\0';
     158             :       }
     159           0 :       close(pipefd[0]);
     160             :     } else
     161             : #endif
     162           0 :     { *str = '\0'; }
     163             :   }
     164           0 : }
     165             : 
     166             : /* give a name to a kind of synchronization construct */
     167           0 : static const char *openmp_trace_sync_name(int kind) {
     168           0 :   static const char *kinds[] = {
     169             :       "master",   "barrier", "implicit barrier", "explicit barrier",
     170             :       "sections", "single",  "single",           "workshare"};
     171           0 :   return (kind * sizeof(*kinds)) < sizeof(kinds) ? kinds[kind]
     172           0 :                                                  : "synchronization";
     173             : }
     174             : 
     175             : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
     176           0 : static void openmp_trace_parallel_begin(
     177             :     ompt_data_t *encountering_task_data,
     178             :     const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
     179             :     unsigned int requested_parallelism, int flags, const void *codeptr_ra) {
     180           0 :   OPENMP_TRACE_UNUSED(encountering_task_data);
     181           0 :   OPENMP_TRACE_UNUSED(encountering_task_frame);
     182           0 :   OPENMP_TRACE_UNUSED(parallel_data);
     183           0 :   OPENMP_TRACE_UNUSED(requested_parallelism);
     184           0 :   if (ompt_parallel_team & flags) {
     185           0 :     const ompt_data_t *sync;
     186             : #pragma omp atomic read
     187           0 :     sync = openmp_trace_sync;
     188           0 :     if (NULL != sync) {
     189           0 :       const int kind = OPENMP_TRACE_PTR_KIND(sync->ptr);
     190           0 :       if (ompt_sync_region_barrier_implementation > kind) {
     191           0 :         ++openmp_trace_issues_n;
     192             :       }
     193           0 :       if (1 /*assert*/ < openmp_trace_level || 0 > openmp_trace_level) {
     194           0 :         const char *const type =
     195           0 :             (ompt_sync_region_barrier_implementation > kind ? "ERROR" : "WARN");
     196           0 :         if ('E' == *type || openmp_trace_level_warn <= openmp_trace_level) {
     197           0 :           const char *const name = openmp_trace_sync_name(kind);
     198           0 :           char symbol[1024], symbol2[1024];
     199           0 :           openmp_trace_symbol(codeptr_ra, symbol, sizeof(symbol),
     200             :                               1 /*cleanup*/);
     201           0 :           openmp_trace_symbol(OPENMP_TRACE_PTR_SYMBOL(sync->ptr), symbol2,
     202             :                               sizeof(symbol2), 1 /*cleanup*/);
     203           0 :           if ('\0' != *symbol) {
     204           0 :             if ('\0' != *symbol2) {
     205           0 :               OPENMP_TRACE_PRINT(type,
     206             :                                  "parallel region \"%s\" opened in %s \"%s\"\n",
     207             :                                  symbol, name, symbol2);
     208             :             } else {
     209           0 :               OPENMP_TRACE_PRINT(type, "parallel region \"%s\" opened in %s\n",
     210             :                                  symbol, name);
     211             :             }
     212             :           } else {
     213           0 :             if ('\0' != *symbol2) {
     214           0 :               OPENMP_TRACE_PRINT(type, "parallel region opened in %s \"%s\"\n",
     215             :                                  name, symbol2);
     216             :             } else {
     217           0 :               OPENMP_TRACE_PRINT(type, "parallel region opened in %s\n", name);
     218             :             }
     219             :           }
     220             :         }
     221             :       } else {
     222           0 :         assert(0);
     223             :       }
     224             :     }
     225             :   }
     226           0 : }
     227             : 
     228             : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
     229           0 : static void openmp_trace_parallel_end(ompt_data_t *parallel_data,
     230             :                                       ompt_data_t *encountering_task_data,
     231             :                                       int flags, const void *codeptr_ra) {
     232           0 :   OPENMP_TRACE_UNUSED(parallel_data);
     233           0 :   OPENMP_TRACE_UNUSED(encountering_task_data);
     234           0 :   if (0 != (ompt_parallel_team & flags) &&
     235           0 :       0 != openmp_trace_get_parallel_info(openmp_trace_parallel_n + 1, NULL,
     236             :                                           NULL)) {
     237           0 :     openmp_trace_parallel = codeptr_ra;
     238           0 :     ++openmp_trace_parallel_n;
     239             :   }
     240           0 : }
     241             : 
     242             : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
     243           0 : static void openmp_trace_master(ompt_scope_endpoint_t endpoint,
     244             :                                 ompt_data_t *parallel_data,
     245             :                                 ompt_data_t *task_data,
     246             :                                 const void *codeptr_ra) {
     247           0 :   OPENMP_TRACE_UNUSED(task_data);
     248           0 :   if (NULL != parallel_data) {
     249           0 :     int sync_n;
     250           0 :     switch ((int)endpoint) {
     251           0 :     case OPENMP_TRACE_ENABLE(ompt_scope_beginend):
     252             :     case ompt_scope_begin: {
     253           0 :       if (OPENMP_TRACE_ENABLE(ompt_scope_beginend) != endpoint) {
     254           0 : #pragma omp atomic capture
     255             :         sync_n = openmp_trace_sync_n++;
     256             :       } else {
     257             : #pragma omp atomic read
     258           0 :         sync_n = openmp_trace_sync_n;
     259             :       }
     260           0 :       if (0 == sync_n) {
     261           0 :         assert(OPENMP_TRACE_PTR(codeptr_ra, 0) == codeptr_ra);
     262           0 :         parallel_data->ptr = (void *)(uintptr_t)codeptr_ra;
     263           0 :         openmp_trace_sync = parallel_data;
     264             :       }
     265             :     } break;
     266           0 :     case ompt_scope_end: {
     267           0 : #pragma omp atomic capture
     268             :       sync_n = --openmp_trace_sync_n;
     269           0 :       if (0 == sync_n) {
     270           0 :         openmp_trace_sync = NULL;
     271             :       }
     272             :     } break;
     273             :     }
     274             :   }
     275           0 : }
     276             : 
     277             : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
     278           0 : void openmp_trace_sync_region(ompt_sync_region_t kind,
     279             :                               ompt_scope_endpoint_t endpoint,
     280             :                               ompt_data_t *parallel_data,
     281             :                               ompt_data_t *task_data, const void *codeptr_ra) {
     282           0 :   OPENMP_TRACE_UNUSED(task_data);
     283           0 :   assert(0 < kind);
     284           0 :   if (NULL != parallel_data && ompt_sync_region_barrier_implementation > kind) {
     285           0 :     int sync_n;
     286           0 :     switch ((int)endpoint) {
     287           0 :     case OPENMP_TRACE_ENABLE(ompt_scope_beginend):
     288             :     case ompt_scope_begin: {
     289           0 :       if (OPENMP_TRACE_ENABLE(ompt_scope_beginend) != endpoint) {
     290           0 : #pragma omp atomic capture
     291             :         sync_n = openmp_trace_sync_n++;
     292             :       } else {
     293             : #pragma omp atomic read
     294           0 :         sync_n = openmp_trace_sync_n;
     295             :       }
     296           0 :       if (0 == sync_n) {
     297           0 :         assert(OPENMP_TRACE_PTR(codeptr_ra, 0) == codeptr_ra);
     298           0 :         parallel_data->ptr =
     299           0 :             (void *)(uintptr_t)OPENMP_TRACE_PTR(codeptr_ra, kind);
     300           0 :         openmp_trace_sync = parallel_data;
     301           0 :       } else if (openmp_trace_level_warn <= openmp_trace_level ||
     302             :                  0 > openmp_trace_level) {
     303           0 :         const ompt_data_t *sync;
     304             : #pragma omp atomic read
     305           0 :         sync = openmp_trace_sync;
     306           0 :         if (NULL != sync && parallel_data != sync) {
     307           0 :           const char *const name = openmp_trace_sync_name(kind);
     308           0 :           char symbol[1024], symbol2[1024];
     309           0 :           openmp_trace_symbol(codeptr_ra, symbol, sizeof(symbol),
     310             :                               1 /*cleanup*/);
     311           0 :           openmp_trace_symbol(OPENMP_TRACE_PTR_SYMBOL(sync->ptr), symbol2,
     312             :                               sizeof(symbol2), 1 /*cleanup*/);
     313           0 :           if ('\0' != *symbol) {
     314           0 :             if ('\0' != *symbol2) {
     315           0 :               OPENMP_TRACE_PRINT("WARN",
     316             :                                  "potential deadlock at \"%s\" in %s \"%s\"\n",
     317             :                                  symbol2, name, symbol);
     318             :             } else {
     319           0 :               OPENMP_TRACE_PRINT("WARN", "potential deadlock in %s \"%s\"\n",
     320             :                                  name, symbol);
     321             :             }
     322             :           } else {
     323           0 :             if ('\0' != *symbol2) {
     324           0 :               OPENMP_TRACE_PRINT("WARN", "potential deadlock at \"%s\" in %s\n",
     325             :                                  symbol2, name);
     326             :             } else {
     327           0 :               OPENMP_TRACE_PRINT("WARN", "potential deadlock in %s\n", name);
     328             :             }
     329             :           }
     330             :         }
     331             :       }
     332             :     } break;
     333           0 :     case ompt_scope_end: {
     334           0 : #pragma omp atomic capture
     335             :       sync_n = --openmp_trace_sync_n;
     336           0 :       if (0 == sync_n) {
     337           0 :         openmp_trace_sync = NULL;
     338             :       }
     339             :     } break;
     340             :     }
     341             :   }
     342           0 : }
     343             : 
     344             : /* https://www.openmp.org/spec-html/5.0/openmpsu187.html */
     345           0 : static void openmp_trace_work(ompt_work_t wstype,
     346             :                               ompt_scope_endpoint_t endpoint,
     347             :                               ompt_data_t *parallel_data,
     348             :                               ompt_data_t *task_data, uint64_t count,
     349             :                               const void *codeptr_ra) {
     350           0 :   OPENMP_TRACE_UNUSED(task_data);
     351           0 :   OPENMP_TRACE_UNUSED(count);
     352           0 :   assert(0 < wstype);
     353           0 :   if (NULL != parallel_data && ompt_work_sections <= wstype &&
     354             :       wstype <= ompt_work_workshare) {
     355           0 :     int sync_n;
     356           0 :     switch ((int)endpoint) {
     357           0 :     case OPENMP_TRACE_ENABLE(ompt_scope_beginend):
     358             :     case ompt_scope_begin: {
     359           0 :       if (OPENMP_TRACE_ENABLE(ompt_scope_beginend) != endpoint) {
     360           0 : #pragma omp atomic capture
     361             :         sync_n = openmp_trace_sync_n++;
     362             :       } else {
     363             : #pragma omp atomic read
     364           0 :         sync_n = openmp_trace_sync_n;
     365             :       }
     366           0 :       if (0 == sync_n) {
     367           0 :         const int kind = wstype - ompt_work_sections +
     368             :                          ompt_sync_region_barrier_implementation;
     369           0 :         assert(OPENMP_TRACE_PTR(codeptr_ra, 0) == codeptr_ra);
     370           0 :         parallel_data->ptr =
     371           0 :             (void *)(uintptr_t)OPENMP_TRACE_PTR(codeptr_ra, kind);
     372           0 :         openmp_trace_sync = parallel_data;
     373             :       }
     374             :     } break;
     375           0 :     case ompt_scope_end: {
     376           0 : #pragma omp atomic capture
     377             :       sync_n = --openmp_trace_sync_n;
     378           0 :       if (0 == sync_n) {
     379           0 :         openmp_trace_sync = NULL;
     380             :       }
     381             :     } break;
     382             :     }
     383             :   }
     384           0 : }
     385             : 
     386             : /* initially, events of interest are registered */
     387           0 : static int openmp_trace_initialize(ompt_function_lookup_t lookup,
     388             :                                    int initial_device_num,
     389             :                                    ompt_data_t *tool_data) {
     390           0 :   const ompt_set_callback_t set_callback =
     391           0 :       (ompt_set_callback_t)lookup("ompt_set_callback");
     392           0 :   openmp_trace_get_parallel_info =
     393           0 :       (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
     394           0 :   OPENMP_TRACE_UNUSED(initial_device_num);
     395           0 :   OPENMP_TRACE_UNUSED(tool_data);
     396           0 :   OPENMP_TRACE_SET_CALLBACK(openmp_trace, parallel_begin);
     397           0 :   OPENMP_TRACE_SET_CALLBACK(openmp_trace, parallel_end);
     398           0 :   OPENMP_TRACE_SET_CALLBACK(openmp_trace, master);
     399           0 :   if (openmp_trace_level_deflt < openmp_trace_level || 0 > openmp_trace_level) {
     400           0 :     OPENMP_TRACE_SET_CALLBACK(openmp_trace, sync_region);
     401             :   }
     402           0 :   if (openmp_trace_level_high <= openmp_trace_level || 0 > openmp_trace_level) {
     403           0 :     OPENMP_TRACE_SET_CALLBACK(openmp_trace, work);
     404             :   }
     405           0 :   assert(NULL != openmp_trace_get_parallel_info);
     406           0 :   return 0 == openmp_trace_issues();
     407             : }
     408             : 
     409             : /* here tool_data might be freed and analysis concludes */
     410           0 : static void openmp_trace_finalize(ompt_data_t *tool_data) {
     411           0 :   OPENMP_TRACE_UNUSED(tool_data);
     412           0 :   if (openmp_trace_level_info <= openmp_trace_level || 0 > openmp_trace_level) {
     413           0 :     if (1 < openmp_trace_parallel_n) { /* nested */
     414           0 :       char symbol[1024];
     415           0 :       openmp_trace_symbol(openmp_trace_parallel, symbol, sizeof(symbol),
     416             :                           1 /*cleanup*/);
     417           0 :       if ('\0' != *symbol) {
     418           0 :         OPENMP_TRACE_PRINT("INFO", "parallelism in \"%s\" is nested (%i)\n",
     419             :                            symbol, openmp_trace_parallel_n);
     420             :       } else {
     421           0 :         OPENMP_TRACE_PRINT("INFO", "parallelism is nested (%i)\n",
     422             :                            openmp_trace_parallel_n);
     423             :       }
     424             :     }
     425             :   }
     426           0 : }
     427             : 
     428             : /* entry point which is automatically called by the OpenMP runtime */
     429           0 : ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
     430             :                                           const char *runtime_version) {
     431           0 :   static ompt_start_tool_result_t openmp_start_tool;
     432           0 :   const char *const enabled_env = getenv("CP2K_OMP_TRACE");
     433           0 :   ompt_start_tool_result_t *result = NULL;
     434           0 :   openmp_trace_level = (NULL == enabled_env ? 0 : atoi(enabled_env));
     435           0 :   OPENMP_TRACE_UNUSED(omp_version);
     436           0 :   OPENMP_TRACE_UNUSED(runtime_version);
     437           0 :   if (0 != openmp_trace_level) { /* trace OpenMP constructs */
     438           0 :     openmp_start_tool.initialize = (ompt_initialize_t)openmp_trace_initialize;
     439           0 :     openmp_start_tool.finalize = (ompt_finalize_t)openmp_trace_finalize;
     440           0 :     openmp_start_tool.tool_data.ptr = NULL;
     441           0 :     result = &openmp_start_tool;
     442             : #if defined(NDEBUG)
     443             :     if (1 == openmp_trace_level) {
     444             :       openmp_trace_level = 2; /* adjust trace level */
     445             :     }
     446             : #endif
     447             :   }
     448           0 :   return result;
     449             : }
     450             : 
     451             : #endif

Generated by: LCOV version 1.15