LCOV - code coverage report
Current view: top level - src/grid/dgemm - grid_dgemm_context.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:262480d) Lines: 263 339 77.6 %
Date: 2024-11-22 07:00:40 Functions: 13 21 61.9 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <math.h>
       9             : #include <omp.h>
      10             : #include <stdio.h>
      11             : #include <stdlib.h>
      12             : #include <string.h>
      13             : 
      14             : #include "../common/grid_library.h"
      15             : #include "grid_dgemm_collocate.h"
      16             : #include "grid_dgemm_collocation_integration.h"
      17             : #include "grid_dgemm_context.h"
      18             : #include "grid_dgemm_private_header.h"
      19             : #include "grid_dgemm_task_list.h"
      20             : #include "grid_dgemm_tensor_local.h"
      21             : #include "grid_dgemm_utils.h"
      22             : 
      23           0 : void return_dh(void *const ptr, const int level, double *const dh) {
      24           0 :   grid_context *const ctx = (grid_context *)ptr;
      25             : 
      26           0 :   assert(ctx->checksum == ctx_checksum);
      27           0 :   dh[0] = ctx->grid[level].dh[0][0];
      28           0 :   dh[1] = ctx->grid[level].dh[0][1];
      29           0 :   dh[2] = ctx->grid[level].dh[0][2];
      30           0 :   dh[3] = ctx->grid[level].dh[1][0];
      31           0 :   dh[4] = ctx->grid[level].dh[1][1];
      32           0 :   dh[5] = ctx->grid[level].dh[1][2];
      33           0 :   dh[6] = ctx->grid[level].dh[2][0];
      34           0 :   dh[7] = ctx->grid[level].dh[2][1];
      35           0 :   dh[8] = ctx->grid[level].dh[2][2];
      36           0 : }
      37             : 
      38           0 : void return_dh_inv(void *const ptr, const int level, double *const dh_inv) {
      39           0 :   grid_context *const ctx = (grid_context *)ptr;
      40             : 
      41           0 :   assert(ctx->checksum == ctx_checksum);
      42           0 :   dh_inv[0] = ctx->grid[level].dh_inv[0][0];
      43           0 :   dh_inv[1] = ctx->grid[level].dh_inv[0][1];
      44           0 :   dh_inv[2] = ctx->grid[level].dh_inv[0][2];
      45           0 :   dh_inv[3] = ctx->grid[level].dh_inv[1][0];
      46           0 :   dh_inv[4] = ctx->grid[level].dh_inv[1][1];
      47           0 :   dh_inv[5] = ctx->grid[level].dh_inv[1][2];
      48           0 :   dh_inv[6] = ctx->grid[level].dh_inv[2][0];
      49           0 :   dh_inv[7] = ctx->grid[level].dh_inv[2][1];
      50           0 :   dh_inv[8] = ctx->grid[level].dh_inv[2][2];
      51           0 : }
      52             : 
      53           0 : int return_num_devs(void *const ptr) {
      54           0 :   grid_context *const ctx = (grid_context *)ptr;
      55           0 :   assert(ctx->checksum == ctx_checksum);
      56             : 
      57           0 :   return ctx->number_of_devices;
      58             : }
      59             : 
      60           0 : int return_device_id(void *const ptr, const int device) {
      61           0 :   grid_context *const ctx = (grid_context *)ptr;
      62           0 :   assert(ctx->checksum == ctx_checksum);
      63             : 
      64           0 :   return ctx->device_id[device];
      65             : }
      66             : 
      67           0 : int is_grid_orthorhombic(void *const ptr) {
      68           0 :   grid_context *const ctx = (grid_context *)ptr;
      69           0 :   assert(ctx->checksum == ctx_checksum);
      70           0 :   return ctx->orthorhombic;
      71             : }
      72             : 
      73           0 : void update_queue_length(void *const ptr, const int queue_length) {
      74           0 :   grid_context *const ctx = (grid_context *)ptr;
      75           0 :   assert(ctx->checksum == ctx_checksum);
      76           0 :   ctx->queue_length = queue_length;
      77           0 : }
      78             : 
      79          20 : void update_atoms_position(const int natoms,
      80             :                            const double atoms_positions[natoms][3],
      81             :                            grid_context *data) {
      82          20 :   assert(data != NULL);
      83             : 
      84          20 :   if (natoms == 0)
      85             :     return;
      86             : 
      87          20 :   if (data->atom_positions == NULL) {
      88           8 :     data->atom_positions = malloc(3 * natoms * sizeof(double));
      89             :   } else {
      90          12 :     if (natoms > data->natoms) {
      91           0 :       data->atom_positions =
      92           0 :           realloc(data->atom_positions, 3 * natoms * sizeof(double));
      93             :     }
      94             :   }
      95          20 :   assert(data->atom_positions != NULL);
      96             : 
      97          20 :   data->natoms = natoms;
      98             : 
      99          20 :   if (data->atom_positions) {
     100          78 :     for (int i = 0; i < natoms; i++) {
     101          58 :       data->atom_positions[3 * i] = atoms_positions[i][0];
     102          58 :       data->atom_positions[3 * i + 1] = atoms_positions[i][1];
     103          58 :       data->atom_positions[3 * i + 2] = atoms_positions[i][2];
     104             :     }
     105             :   }
     106             : }
     107             : 
     108          20 : void update_atoms_kinds(const int natoms, const int *atoms_kinds,
     109             :                         grid_context *data) {
     110          20 :   assert(data != NULL);
     111             : 
     112             :   // data->atom_kinds is a table that give the type of a given atom.
     113          20 :   if (natoms == 0)
     114             :     return;
     115             : 
     116          20 :   if (data->atom_kinds == NULL) {
     117           8 :     data->atom_kinds = malloc(natoms * sizeof(int));
     118             :   } else {
     119          12 :     if ((natoms > data->natoms) && (data->natoms > 0)) {
     120           0 :       data->atom_kinds = realloc(data->atom_kinds, natoms * sizeof(int));
     121             :     }
     122             :   }
     123          20 :   assert(data->atom_kinds != NULL);
     124             :   // data->natoms is initialized before calling this function
     125          20 :   if (data->natoms)
     126          20 :     memcpy(data->atom_kinds, atoms_kinds, sizeof(int) * natoms);
     127             : 
     128          78 :   for (int i = 0; i < natoms; i++) {
     129          58 :     data->atom_kinds[i] -= 1;
     130             :   }
     131             : }
     132             : 
     133          20 : void update_block_offsets(const int nblocks, const int *const block_offsets,
     134             :                           grid_context *data) {
     135          20 :   assert(data != NULL);
     136             : 
     137          20 :   if (nblocks == 0)
     138             :     return;
     139             : 
     140          19 :   if (data->block_offsets == NULL) {
     141           7 :     data->block_offsets = malloc(nblocks * sizeof(int));
     142             :   } else {
     143          12 :     if ((nblocks > data->nblocks_total) && (data->nblocks_total > 0)) {
     144           0 :       data->block_offsets = realloc(data->block_offsets, sizeof(int) * nblocks);
     145             :     }
     146             :   }
     147          19 :   assert(data->block_offsets != NULL);
     148             : 
     149          19 :   data->nblocks = nblocks;
     150          19 :   data->nblocks_total = imax(data->nblocks_total, nblocks);
     151          19 :   if (nblocks)
     152          19 :     memcpy(data->block_offsets, block_offsets, nblocks * sizeof(int));
     153             : }
     154             : 
     155          20 : void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets,
     156             :                       grid_context *data) {
     157          20 :   if (nkinds > data->nkinds_total) {
     158           8 :     if (data->basis_sets == NULL) {
     159           8 :       data->basis_sets = malloc(nkinds * sizeof(grid_basis_set *));
     160             :     } else {
     161           0 :       data->basis_sets =
     162           0 :           realloc(data->basis_sets, nkinds * sizeof(grid_basis_set *));
     163             :     }
     164             :   }
     165          20 :   assert(data->basis_sets != NULL);
     166          20 :   data->nkinds = nkinds;
     167          20 :   data->nkinds_total = imax(data->nkinds_total, nkinds);
     168          20 :   memcpy(data->basis_sets, basis_sets, nkinds * sizeof(grid_basis_set *));
     169          20 : }
     170             : 
     171          20 : void update_task_lists(const int nlevels, const int ntasks,
     172             :                        const int *const level_list, const int *const iatom_list,
     173             :                        const int *const jatom_list, const int *const iset_list,
     174             :                        const int *const jset_list, const int *const ipgf_list,
     175             :                        const int *const jpgf_list,
     176             :                        const int *const border_mask_list,
     177             :                        const int *block_num_list,
     178             :                        const double *const radius_list,
     179             :                        const double rab_list[ntasks][3], grid_context *ctx) {
     180             : 
     181          20 :   assert(ctx->checksum == ctx_checksum);
     182             : 
     183          20 :   if (nlevels == 0)
     184             :     return;
     185             : 
     186          20 :   if (ctx->ntasks == 0) {
     187             :     // Count tasks per level.
     188           8 :     size_t size = nlevels * sizeof(int);
     189           8 :     ctx->tasks_per_level = malloc(size);
     190           8 :     ctx->tasks = malloc(nlevels * sizeof(_task *));
     191             :     /* memset(ctx->tasks, 0, nlevels * sizeof(_task *)); */
     192           8 :     if (ntasks)
     193           7 :       ctx->tasks[0] = malloc(ntasks * sizeof(_task));
     194             :     else
     195           1 :       ctx->tasks[0] = NULL;
     196             :   } else {
     197          12 :     if (ctx->nlevels_total < nlevels) {
     198             :       /* save the address of the full task list. NULL when completly empty */
     199           0 :       ctx->tasks = realloc(ctx->tasks, nlevels * sizeof(_task *));
     200           0 :       assert(ctx->tasks != NULL);
     201             :     }
     202          12 :     if (ctx->ntasks_total < ntasks) {
     203           0 :       ctx->tasks[0] = realloc(ctx->tasks[0], ntasks * sizeof(_task));
     204           0 :       assert(ctx->tasks[0] != NULL);
     205             :     }
     206             :   }
     207             : 
     208          20 :   memset(ctx->tasks_per_level, 0, nlevels * sizeof(int));
     209          20 :   ctx->nlevels = nlevels;
     210          20 :   ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
     211          20 :   ctx->ntasks_total = imax(ctx->ntasks_total, ntasks);
     212          20 :   ctx->ntasks = ntasks;
     213             : 
     214        5793 :   for (int i = 0; i < ntasks; i++) {
     215        5773 :     ctx->tasks_per_level[level_list[i] - 1]++;
     216        5773 :     assert(i == 0 || level_list[i] >= level_list[i - 1]); // expect ordered list
     217             :   }
     218             : 
     219          80 :   for (int i = 1; i < ctx->nlevels; i++) {
     220          60 :     ctx->tasks[i] = ctx->tasks[i - 1] + ctx->tasks_per_level[i - 1];
     221             :   }
     222             : 
     223          20 :   int prev_block_num = -1;
     224          20 :   int prev_iset = -1;
     225          20 :   int prev_jset = -1;
     226          20 :   int prev_level = -1;
     227          20 :   _task *task = ctx->tasks[0];
     228        5793 :   for (int i = 0; i < ntasks; i++) {
     229        5773 :     if (prev_level != (level_list[i] - 1)) {
     230          57 :       prev_level = level_list[i] - 1;
     231          57 :       prev_block_num = -1;
     232          57 :       prev_iset = -1;
     233          57 :       prev_jset = -1;
     234             :     }
     235        5773 :     task->level = level_list[i] - 1;
     236        5773 :     task->iatom = iatom_list[i] - 1;
     237        5773 :     task->jatom = jatom_list[i] - 1;
     238        5773 :     task->iset = iset_list[i] - 1;
     239        5773 :     task->jset = jset_list[i] - 1;
     240        5773 :     task->ipgf = ipgf_list[i] - 1;
     241        5773 :     task->jpgf = jpgf_list[i] - 1;
     242        5773 :     task->border_mask = border_mask_list[i];
     243        5773 :     task->block_num = block_num_list[i] - 1;
     244        5773 :     task->radius = radius_list[i];
     245        5773 :     task->rab[0] = rab_list[i][0];
     246        5773 :     task->rab[1] = rab_list[i][1];
     247        5773 :     task->rab[2] = rab_list[i][2];
     248        5773 :     const int iatom = task->iatom;
     249        5773 :     const int jatom = task->jatom;
     250        5773 :     const int iset = task->iset;
     251        5773 :     const int jset = task->jset;
     252        5773 :     const int ipgf = task->ipgf;
     253        5773 :     const int jpgf = task->jpgf;
     254        5773 :     const int ikind = ctx->atom_kinds[iatom];
     255        5773 :     const int jkind = ctx->atom_kinds[jatom];
     256        5773 :     const grid_basis_set *ibasis = ctx->basis_sets[ikind];
     257        5773 :     const grid_basis_set *jbasis = ctx->basis_sets[jkind];
     258        5773 :     const int ncoseta = ncoset(ibasis->lmax[iset]);
     259        5773 :     const int ncosetb = ncoset(jbasis->lmax[jset]);
     260             : 
     261        5773 :     task->zeta[0] = ibasis->zet[iset * ibasis->maxpgf + ipgf];
     262        5773 :     task->zeta[1] = jbasis->zet[jset * jbasis->maxpgf + jpgf];
     263             : 
     264        5773 :     const double *ra = &ctx->atom_positions[3 * iatom];
     265        5773 :     const double zetp = task->zeta[0] + task->zeta[1];
     266        5773 :     const double f = task->zeta[1] / zetp;
     267        5773 :     const double rab2 = task->rab[0] * task->rab[0] +
     268        5773 :                         task->rab[1] * task->rab[1] +
     269        5773 :                         task->rab[2] * task->rab[2];
     270             : 
     271        5773 :     task->prefactor = exp(-task->zeta[0] * f * rab2);
     272        5773 :     task->zetp = zetp;
     273             : 
     274        5773 :     const int block_num = task->block_num;
     275             : 
     276       23092 :     for (int i = 0; i < 3; i++) {
     277       17319 :       task->ra[i] = ra[i];
     278       17319 :       task->rp[i] = ra[i] + f * task->rab[i];
     279       17319 :       task->rb[i] = ra[i] + task->rab[i];
     280             :     }
     281             : 
     282        5773 :     task->lmax[0] = ibasis->lmax[iset];
     283        5773 :     task->lmax[1] = jbasis->lmax[jset];
     284        5773 :     task->lmin[0] = ibasis->lmin[iset];
     285        5773 :     task->lmin[1] = jbasis->lmin[jset];
     286             : 
     287        5773 :     if ((block_num != prev_block_num) || (iset != prev_iset) ||
     288             :         (jset != prev_jset)) {
     289         558 :       task->update_block_ = true;
     290         558 :       prev_block_num = block_num;
     291         558 :       prev_iset = iset;
     292         558 :       prev_jset = jset;
     293             :     } else {
     294        5215 :       task->update_block_ = false;
     295             :     }
     296             : 
     297        5773 :     task->offset[0] = ipgf * ncoseta;
     298        5773 :     task->offset[1] = jpgf * ncosetb;
     299        5773 :     task++;
     300             :   }
     301             : 
     302             :   // Find largest Cartesian subblock size.
     303          20 :   ctx->maxco = 0;
     304          56 :   for (int i = 0; i < ctx->nkinds; i++) {
     305          36 :     ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
     306             :   }
     307             : }
     308             : 
     309          20 : void update_layouts(const int nlevels, const int npts_global[nlevels][3],
     310             :                     const int npts_local[nlevels][3],
     311             :                     const int shift_local[nlevels][3],
     312             :                     const int border_width[nlevels][3],
     313             :                     const double dh[nlevels][3][3],
     314             :                     const double dh_inv[nlevels][3][3], grid_context *ctx) {
     315             : 
     316          20 :   assert(ctx != NULL);
     317          20 :   assert(ctx->checksum == ctx_checksum);
     318             : 
     319          20 :   if (ctx->layouts != NULL) {
     320          12 :     free(ctx->layouts);
     321             :   }
     322             : 
     323          20 :   ctx->layouts = malloc(sizeof(_layout) * nlevels);
     324             : 
     325         100 :   for (int level = 0; level < nlevels; level++) {
     326         320 :     for (int i = 0; i < 3; i++) {
     327         240 :       ctx->layouts[level].npts_global[i] = npts_global[level][i];
     328         240 :       ctx->layouts[level].npts_local[i] = npts_local[level][i];
     329         240 :       ctx->layouts[level].shift_local[i] = shift_local[level][i];
     330         240 :       ctx->layouts[level].border_width[i] = border_width[level][i];
     331         960 :       for (int j = 0; j < 3; j++) {
     332         720 :         ctx->layouts[level].dh[i][j] = dh[level][i][j];
     333         720 :         ctx->layouts[level].dh_inv[i][j] = dh_inv[level][i][j];
     334             :       }
     335             :     }
     336             :   }
     337          20 : }
     338             : 
     339          20 : void update_grid(const int nlevels, grid_context *ctx) {
     340          20 :   assert(ctx != NULL);
     341          20 :   assert(ctx->checksum == ctx_checksum);
     342             : 
     343          20 :   if (nlevels == 0)
     344             :     return;
     345             : 
     346          20 :   if (ctx->grid == NULL) {
     347           8 :     ctx->grid = malloc(sizeof(tensor) * nlevels);
     348             :   } else {
     349          12 :     if (ctx->nlevels_total < nlevels) {
     350           0 :       ctx->grid = realloc(ctx->grid, sizeof(tensor) * nlevels);
     351             :     }
     352             :   }
     353          20 :   assert(ctx->grid != NULL);
     354             : 
     355          20 :   ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
     356          20 :   ctx->nlevels = nlevels;
     357             : }
     358             : 
     359           8 : void *create_grid_context_dgemm(
     360             :     const bool orthorhombic, const int ntasks, const int nlevels,
     361             :     const int natoms, const int nkinds, const int nblocks,
     362             :     const int *block_offsets, const double atom_positions[natoms][3],
     363             :     const int *const atom_kinds, const grid_basis_set **const basis_sets,
     364             :     const int *const level_list, const int *const iatom_list,
     365             :     const int *jatom_list, const int *const iset_list,
     366             :     const int *const jset_list, const int *const ipgf_list,
     367             :     const int *const jpgf_list, const int *const border_mask_list,
     368             :     const int *block_num_list, const double *const radius_list,
     369             :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     370             :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     371             :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     372             :     const double dh_inv[nlevels][3][3]) {
     373             : 
     374           8 :   grid_context *ctx = malloc(sizeof(grid_context));
     375             : 
     376           8 :   memset(ctx, 0, sizeof(grid_context));
     377             : 
     378           8 :   ctx->checksum = ctx_checksum;
     379           8 :   ctx->orthorhombic = orthorhombic;
     380           8 :   update_block_offsets(nblocks, block_offsets, ctx);
     381           8 :   update_atoms_position(natoms, atom_positions, ctx);
     382           8 :   update_atoms_kinds(natoms, atom_kinds, ctx);
     383           8 :   update_basis_set(nkinds, basis_sets, ctx);
     384           8 :   update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
     385             :                     iset_list, jset_list, ipgf_list, jpgf_list,
     386             :                     border_mask_list, block_num_list, radius_list, rab_list,
     387             :                     ctx);
     388           8 :   update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
     389             :                  dh, dh_inv, ctx);
     390           8 :   update_grid(nlevels, ctx);
     391             : 
     392           8 :   const int max_threads = omp_get_max_threads();
     393             : 
     394           8 :   ctx->handler =
     395           8 :       malloc(sizeof(struct collocation_integration_ *) * max_threads);
     396             : 
     397          16 :   for (int i = 0; i < max_threads; i++) {
     398           8 :     ctx->handler[i] = collocate_create_handle();
     399             :   }
     400             : 
     401           8 :   ctx->number_of_handler = max_threads;
     402             : 
     403           8 :   return ctx;
     404             : }
     405             : 
     406          12 : void update_grid_context_dgemm(
     407             :     const bool orthorhombic, const int ntasks, const int nlevels,
     408             :     const int natoms, const int nkinds, const int nblocks,
     409             :     const int *block_offsets, const double atom_positions[natoms][3],
     410             :     const int *const atom_kinds, const grid_basis_set **const basis_sets,
     411             :     const int *const level_list, const int *const iatom_list,
     412             :     const int *jatom_list, const int *const iset_list,
     413             :     const int *const jset_list, const int *const ipgf_list,
     414             :     const int *const jpgf_list, const int *const border_mask_list,
     415             :     const int *block_num_list, const double *const radius_list,
     416             :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     417             :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     418             :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     419             :     const double dh_inv[nlevels][3][3], void *ptr) {
     420             : 
     421          12 :   assert(ptr != NULL);
     422          12 :   grid_context *ctx = (grid_context *)ptr;
     423          12 :   assert(ctx->checksum == ctx_checksum);
     424             : 
     425          12 :   ctx->orthorhombic = orthorhombic;
     426          12 :   update_block_offsets(nblocks, block_offsets, ctx);
     427          12 :   update_atoms_position(natoms, atom_positions, ctx);
     428          12 :   update_atoms_kinds(natoms, atom_kinds, ctx);
     429          12 :   update_basis_set(nkinds, basis_sets, ctx);
     430          12 :   update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
     431             :                     iset_list, jset_list, ipgf_list, jpgf_list,
     432             :                     border_mask_list, block_num_list, radius_list, rab_list,
     433             :                     ctx);
     434          12 :   update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
     435             :                  dh, dh_inv, ctx);
     436          12 :   update_grid(nlevels, ctx);
     437             : 
     438             :   // Find largest Cartesian subblock size.
     439          12 :   ctx->maxco = 0;
     440          36 :   for (int i = 0; i < nkinds; i++) {
     441          24 :     ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
     442             :   }
     443          12 : }
     444             : 
     445           0 : void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices,
     446             :                                     const int *device_id) {
     447           0 :   assert(ptr != NULL);
     448           0 :   grid_context *ctx = (grid_context *)ptr;
     449           0 :   assert(ctx->checksum == ctx_checksum);
     450           0 :   ctx->work_on_gpu = false;
     451           0 :   if (number_of_devices <= 0) {
     452             :     return;
     453             :   }
     454             : 
     455           0 :   ctx->number_of_devices = number_of_devices;
     456           0 :   ctx->queue_length = 8192;
     457           0 :   if (ctx->device_id == NULL) {
     458           0 :     ctx->device_id = malloc(sizeof(int) * number_of_devices);
     459             :   } else {
     460           0 :     ctx->device_id = realloc(ctx->device_id, sizeof(int) * number_of_devices);
     461             :   }
     462           0 :   assert(ctx->device_id != NULL);
     463             : 
     464           0 :   memcpy(ctx->device_id, device_id, sizeof(int) * number_of_devices);
     465             : }
     466             : 
     467           8 : void destroy_grid_context_dgemm(void *ptr) {
     468           8 :   assert(ptr);
     469           8 :   grid_context *ctx = (grid_context *)ptr;
     470           8 :   assert(ctx->checksum == ctx_checksum);
     471           8 :   free(ctx->block_offsets);
     472           8 :   free(ctx->atom_positions);
     473           8 :   free(ctx->atom_kinds);
     474           8 :   free(ctx->basis_sets);
     475           8 :   free(ctx->tasks[0]);
     476           8 :   free(ctx->tasks);
     477           8 :   free(ctx->tasks_per_level);
     478           8 :   free(ctx->layouts);
     479           8 :   free(ctx->grid);
     480           8 :   if (ctx->device_id)
     481           0 :     free(ctx->device_id);
     482             : 
     483           8 :   if (ctx->handler) {
     484          16 :     for (int i = 0; i < ctx->number_of_handler; i++) {
     485           8 :       collocate_destroy_handle(ctx->handler[i]);
     486             :     }
     487           8 :     free(ctx->handler);
     488             :   }
     489             : 
     490           8 :   free(ctx);
     491           8 : }
     492             : 
     493           0 : void apply_cutoff(void *ptr) {
     494           0 :   assert(ptr);
     495           0 :   grid_context *ctx = (grid_context *)ptr;
     496           0 :   assert(ctx->checksum == ctx_checksum);
     497           0 :   ctx->apply_cutoff = true;
     498           0 : }
     499             : 
     500        1256 : void set_grid_parameters(
     501             :     tensor *grid, const bool orthorhombic,
     502             :     const int grid_full_size[3],  /* size of the full grid */
     503             :     const int grid_local_size[3], /* size of the local grid block */
     504             :     const int shift_local[3],     /* coordinates of the lower coordinates of the
     505             :                                      local grid window */
     506             :     const int border_width[3],    /* width of the borders */
     507             :     const double
     508             :         dh[3][3], /* displacement vectors of the grid (cartesian) -> (ijk) */
     509             :     const double dh_inv[3][3], /* (ijk) -> (x,y,z) */
     510             :     offload_buffer *grid_) {
     511        1256 :   memset(grid, 0, sizeof(tensor));
     512        1256 :   initialize_tensor_3(grid, grid_local_size[2], grid_local_size[1],
     513             :                       grid_local_size[0]);
     514             : 
     515        1256 :   grid->data = grid_->host_buffer;
     516        1256 :   grid->ld_ = grid_local_size[0];
     517             : 
     518        1256 :   setup_global_grid_size(grid, &grid_full_size[0]);
     519             : 
     520             :   /* the grid is divided over several ranks or not periodic */
     521        1256 :   if ((grid_local_size[0] != grid_full_size[0]) ||
     522        1256 :       (grid_local_size[1] != grid_full_size[1]) ||
     523        1256 :       (grid_local_size[2] != grid_full_size[2])) {
     524           0 :     setup_grid_window(grid, shift_local, border_width, 0);
     525             :   } else {
     526        1256 :     grid->window_shift[0] = 0;
     527        1256 :     grid->window_shift[1] = 0;
     528        1256 :     grid->window_shift[2] = 0;
     529             : 
     530        1256 :     grid->window_size[0] = grid->size[0];
     531        1256 :     grid->window_size[1] = grid->size[1];
     532        1256 :     grid->window_size[2] = grid->size[2];
     533             :   }
     534             : 
     535        1256 :   grid->dh[0][0] = dh[0][0];
     536        1256 :   grid->dh[0][1] = dh[0][1];
     537        1256 :   grid->dh[0][2] = dh[0][2];
     538        1256 :   grid->dh[1][0] = dh[1][0];
     539        1256 :   grid->dh[1][1] = dh[1][1];
     540        1256 :   grid->dh[1][2] = dh[1][2];
     541        1256 :   grid->dh[2][0] = dh[2][0];
     542        1256 :   grid->dh[2][1] = dh[2][1];
     543        1256 :   grid->dh[2][2] = dh[2][2];
     544             : 
     545        1256 :   grid->dh_inv[0][0] = dh_inv[0][0];
     546        1256 :   grid->dh_inv[0][1] = dh_inv[0][1];
     547        1256 :   grid->dh_inv[0][2] = dh_inv[0][2];
     548        1256 :   grid->dh_inv[1][0] = dh_inv[1][0];
     549        1256 :   grid->dh_inv[1][1] = dh_inv[1][1];
     550        1256 :   grid->dh_inv[1][2] = dh_inv[1][2];
     551        1256 :   grid->dh_inv[2][0] = dh_inv[2][0];
     552        1256 :   grid->dh_inv[2][1] = dh_inv[2][1];
     553        1256 :   grid->dh_inv[2][2] = dh_inv[2][2];
     554             : 
     555        1256 :   verify_orthogonality(dh, grid->orthogonal);
     556             : 
     557        1256 :   if (orthorhombic) {
     558         672 :     grid->orthogonal[0] = true;
     559         672 :     grid->orthogonal[1] = true;
     560         672 :     grid->orthogonal[2] = true;
     561             :   }
     562        1256 : }
     563             : 
     564             : /*******************************************************************************
     565             :  * \brief Allocates a task list for the dgemm backend.
     566             :  *        See grid_task_list.h for details.
     567             :  ******************************************************************************/
     568          20 : void grid_dgemm_create_task_list(
     569             :     const bool orthorhombic, const int ntasks, const int nlevels,
     570             :     const int natoms, const int nkinds, const int nblocks,
     571             :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
     572             :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
     573             :     const int level_list[ntasks], const int iatom_list[ntasks],
     574             :     const int jatom_list[ntasks], const int iset_list[ntasks],
     575             :     const int jset_list[ntasks], const int ipgf_list[ntasks],
     576             :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
     577             :     const int block_num_list[ntasks], const double radius_list[ntasks],
     578             :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     579             :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     580             :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     581             :     const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list) {
     582             : 
     583          20 :   if (*task_list == NULL) {
     584           8 :     *task_list = create_grid_context_dgemm(
     585             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     586             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     587             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     588             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     589             :         npts_local, shift_local, border_width, dh, dh_inv);
     590             :   } else {
     591          12 :     update_grid_context_dgemm(
     592             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     593             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     594             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     595             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     596             :         npts_local, shift_local, border_width, dh, dh_inv, *task_list);
     597             :   }
     598             : 
     599          20 :   const grid_library_config config = grid_library_get_config();
     600          20 :   if (config.apply_cutoff) {
     601           0 :     apply_cutoff(*task_list);
     602             :   }
     603          20 : }
     604             : 
     605             : /*******************************************************************************
     606             :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     607             :  ******************************************************************************/
     608           8 : void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list) {
     609           8 :   destroy_grid_context_dgemm(task_list);
     610           8 : }

Generated by: LCOV version 1.15