Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <math.h>
9 : #include <omp.h>
10 : #include <stdio.h>
11 : #include <stdlib.h>
12 : #include <string.h>
13 :
14 : #include "../common/grid_library.h"
15 : #include "grid_dgemm_collocate.h"
16 : #include "grid_dgemm_collocation_integration.h"
17 : #include "grid_dgemm_context.h"
18 : #include "grid_dgemm_private_header.h"
19 : #include "grid_dgemm_task_list.h"
20 : #include "grid_dgemm_tensor_local.h"
21 : #include "grid_dgemm_utils.h"
22 :
23 0 : void return_dh(void *const ptr, const int level, double *const dh) {
24 0 : grid_context *const ctx = (grid_context *)ptr;
25 :
26 0 : assert(ctx->checksum == ctx_checksum);
27 0 : dh[0] = ctx->grid[level].dh[0][0];
28 0 : dh[1] = ctx->grid[level].dh[0][1];
29 0 : dh[2] = ctx->grid[level].dh[0][2];
30 0 : dh[3] = ctx->grid[level].dh[1][0];
31 0 : dh[4] = ctx->grid[level].dh[1][1];
32 0 : dh[5] = ctx->grid[level].dh[1][2];
33 0 : dh[6] = ctx->grid[level].dh[2][0];
34 0 : dh[7] = ctx->grid[level].dh[2][1];
35 0 : dh[8] = ctx->grid[level].dh[2][2];
36 0 : }
37 :
38 0 : void return_dh_inv(void *const ptr, const int level, double *const dh_inv) {
39 0 : grid_context *const ctx = (grid_context *)ptr;
40 :
41 0 : assert(ctx->checksum == ctx_checksum);
42 0 : dh_inv[0] = ctx->grid[level].dh_inv[0][0];
43 0 : dh_inv[1] = ctx->grid[level].dh_inv[0][1];
44 0 : dh_inv[2] = ctx->grid[level].dh_inv[0][2];
45 0 : dh_inv[3] = ctx->grid[level].dh_inv[1][0];
46 0 : dh_inv[4] = ctx->grid[level].dh_inv[1][1];
47 0 : dh_inv[5] = ctx->grid[level].dh_inv[1][2];
48 0 : dh_inv[6] = ctx->grid[level].dh_inv[2][0];
49 0 : dh_inv[7] = ctx->grid[level].dh_inv[2][1];
50 0 : dh_inv[8] = ctx->grid[level].dh_inv[2][2];
51 0 : }
52 :
53 0 : int return_num_devs(void *const ptr) {
54 0 : grid_context *const ctx = (grid_context *)ptr;
55 0 : assert(ctx->checksum == ctx_checksum);
56 :
57 0 : return ctx->number_of_devices;
58 : }
59 :
60 0 : int return_device_id(void *const ptr, const int device) {
61 0 : grid_context *const ctx = (grid_context *)ptr;
62 0 : assert(ctx->checksum == ctx_checksum);
63 :
64 0 : return ctx->device_id[device];
65 : }
66 :
67 0 : int is_grid_orthorhombic(void *const ptr) {
68 0 : grid_context *const ctx = (grid_context *)ptr;
69 0 : assert(ctx->checksum == ctx_checksum);
70 0 : return ctx->orthorhombic;
71 : }
72 :
73 0 : void update_queue_length(void *const ptr, const int queue_length) {
74 0 : grid_context *const ctx = (grid_context *)ptr;
75 0 : assert(ctx->checksum == ctx_checksum);
76 0 : ctx->queue_length = queue_length;
77 0 : }
78 :
79 20 : void update_atoms_position(const int natoms,
80 : const double atoms_positions[natoms][3],
81 : grid_context *data) {
82 20 : assert(data != NULL);
83 :
84 20 : if (natoms == 0)
85 : return;
86 :
87 20 : if (data->atom_positions == NULL) {
88 8 : data->atom_positions = malloc(3 * natoms * sizeof(double));
89 : } else {
90 12 : if (natoms > data->natoms) {
91 0 : data->atom_positions =
92 0 : realloc(data->atom_positions, 3 * natoms * sizeof(double));
93 : }
94 : }
95 20 : assert(data->atom_positions != NULL);
96 :
97 20 : data->natoms = natoms;
98 :
99 20 : if (data->atom_positions) {
100 78 : for (int i = 0; i < natoms; i++) {
101 58 : data->atom_positions[3 * i] = atoms_positions[i][0];
102 58 : data->atom_positions[3 * i + 1] = atoms_positions[i][1];
103 58 : data->atom_positions[3 * i + 2] = atoms_positions[i][2];
104 : }
105 : }
106 : }
107 :
108 20 : void update_atoms_kinds(const int natoms, const int *atoms_kinds,
109 : grid_context *data) {
110 20 : assert(data != NULL);
111 :
112 : // data->atom_kinds is a table that give the type of a given atom.
113 20 : if (natoms == 0)
114 : return;
115 :
116 20 : if (data->atom_kinds == NULL) {
117 8 : data->atom_kinds = malloc(natoms * sizeof(int));
118 : } else {
119 12 : if ((natoms > data->natoms) && (data->natoms > 0)) {
120 0 : data->atom_kinds = realloc(data->atom_kinds, natoms * sizeof(int));
121 : }
122 : }
123 20 : assert(data->atom_kinds != NULL);
124 : // data->natoms is initialized before calling this function
125 20 : if (data->natoms)
126 20 : memcpy(data->atom_kinds, atoms_kinds, sizeof(int) * natoms);
127 :
128 78 : for (int i = 0; i < natoms; i++) {
129 58 : data->atom_kinds[i] -= 1;
130 : }
131 : }
132 :
133 20 : void update_block_offsets(const int nblocks, const int *const block_offsets,
134 : grid_context *data) {
135 20 : assert(data != NULL);
136 :
137 20 : if (nblocks == 0)
138 : return;
139 :
140 19 : if (data->block_offsets == NULL) {
141 7 : data->block_offsets = malloc(nblocks * sizeof(int));
142 : } else {
143 12 : if ((nblocks > data->nblocks_total) && (data->nblocks_total > 0)) {
144 0 : data->block_offsets = realloc(data->block_offsets, sizeof(int) * nblocks);
145 : }
146 : }
147 19 : assert(data->block_offsets != NULL);
148 :
149 19 : data->nblocks = nblocks;
150 19 : data->nblocks_total = imax(data->nblocks_total, nblocks);
151 19 : if (nblocks)
152 19 : memcpy(data->block_offsets, block_offsets, nblocks * sizeof(int));
153 : }
154 :
155 20 : void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets,
156 : grid_context *data) {
157 20 : if (nkinds > data->nkinds_total) {
158 8 : if (data->basis_sets == NULL) {
159 8 : data->basis_sets = malloc(nkinds * sizeof(grid_basis_set *));
160 : } else {
161 0 : data->basis_sets =
162 0 : realloc(data->basis_sets, nkinds * sizeof(grid_basis_set *));
163 : }
164 : }
165 20 : assert(data->basis_sets != NULL);
166 20 : data->nkinds = nkinds;
167 20 : data->nkinds_total = imax(data->nkinds_total, nkinds);
168 20 : memcpy(data->basis_sets, basis_sets, nkinds * sizeof(grid_basis_set *));
169 20 : }
170 :
171 20 : void update_task_lists(const int nlevels, const int ntasks,
172 : const int *const level_list, const int *const iatom_list,
173 : const int *const jatom_list, const int *const iset_list,
174 : const int *const jset_list, const int *const ipgf_list,
175 : const int *const jpgf_list,
176 : const int *const border_mask_list,
177 : const int *block_num_list,
178 : const double *const radius_list,
179 : const double rab_list[ntasks][3], grid_context *ctx) {
180 :
181 20 : assert(ctx->checksum == ctx_checksum);
182 :
183 20 : if (nlevels == 0)
184 : return;
185 :
186 20 : if (ctx->ntasks == 0) {
187 : // Count tasks per level.
188 8 : size_t size = nlevels * sizeof(int);
189 8 : ctx->tasks_per_level = malloc(size);
190 8 : ctx->tasks = malloc(nlevels * sizeof(_task *));
191 : /* memset(ctx->tasks, 0, nlevels * sizeof(_task *)); */
192 8 : if (ntasks)
193 7 : ctx->tasks[0] = malloc(ntasks * sizeof(_task));
194 : else
195 1 : ctx->tasks[0] = NULL;
196 : } else {
197 12 : if (ctx->nlevels_total < nlevels) {
198 : /* save the address of the full task list. NULL when completly empty */
199 0 : ctx->tasks = realloc(ctx->tasks, nlevels * sizeof(_task *));
200 0 : assert(ctx->tasks != NULL);
201 : }
202 12 : if (ctx->ntasks_total < ntasks) {
203 0 : ctx->tasks[0] = realloc(ctx->tasks[0], ntasks * sizeof(_task));
204 0 : assert(ctx->tasks[0] != NULL);
205 : }
206 : }
207 :
208 20 : memset(ctx->tasks_per_level, 0, nlevels * sizeof(int));
209 20 : ctx->nlevels = nlevels;
210 20 : ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
211 20 : ctx->ntasks_total = imax(ctx->ntasks_total, ntasks);
212 20 : ctx->ntasks = ntasks;
213 :
214 5793 : for (int i = 0; i < ntasks; i++) {
215 5773 : ctx->tasks_per_level[level_list[i] - 1]++;
216 5773 : assert(i == 0 || level_list[i] >= level_list[i - 1]); // expect ordered list
217 : }
218 :
219 80 : for (int i = 1; i < ctx->nlevels; i++) {
220 60 : ctx->tasks[i] = ctx->tasks[i - 1] + ctx->tasks_per_level[i - 1];
221 : }
222 :
223 20 : int prev_block_num = -1;
224 20 : int prev_iset = -1;
225 20 : int prev_jset = -1;
226 20 : int prev_level = -1;
227 20 : _task *task = ctx->tasks[0];
228 5793 : for (int i = 0; i < ntasks; i++) {
229 5773 : if (prev_level != (level_list[i] - 1)) {
230 57 : prev_level = level_list[i] - 1;
231 57 : prev_block_num = -1;
232 57 : prev_iset = -1;
233 57 : prev_jset = -1;
234 : }
235 5773 : task->level = level_list[i] - 1;
236 5773 : task->iatom = iatom_list[i] - 1;
237 5773 : task->jatom = jatom_list[i] - 1;
238 5773 : task->iset = iset_list[i] - 1;
239 5773 : task->jset = jset_list[i] - 1;
240 5773 : task->ipgf = ipgf_list[i] - 1;
241 5773 : task->jpgf = jpgf_list[i] - 1;
242 5773 : task->border_mask = border_mask_list[i];
243 5773 : task->block_num = block_num_list[i] - 1;
244 5773 : task->radius = radius_list[i];
245 5773 : task->rab[0] = rab_list[i][0];
246 5773 : task->rab[1] = rab_list[i][1];
247 5773 : task->rab[2] = rab_list[i][2];
248 5773 : const int iatom = task->iatom;
249 5773 : const int jatom = task->jatom;
250 5773 : const int iset = task->iset;
251 5773 : const int jset = task->jset;
252 5773 : const int ipgf = task->ipgf;
253 5773 : const int jpgf = task->jpgf;
254 5773 : const int ikind = ctx->atom_kinds[iatom];
255 5773 : const int jkind = ctx->atom_kinds[jatom];
256 5773 : const grid_basis_set *ibasis = ctx->basis_sets[ikind];
257 5773 : const grid_basis_set *jbasis = ctx->basis_sets[jkind];
258 5773 : const int ncoseta = ncoset(ibasis->lmax[iset]);
259 5773 : const int ncosetb = ncoset(jbasis->lmax[jset]);
260 :
261 5773 : task->zeta[0] = ibasis->zet[iset * ibasis->maxpgf + ipgf];
262 5773 : task->zeta[1] = jbasis->zet[jset * jbasis->maxpgf + jpgf];
263 :
264 5773 : const double *ra = &ctx->atom_positions[3 * iatom];
265 5773 : const double zetp = task->zeta[0] + task->zeta[1];
266 5773 : const double f = task->zeta[1] / zetp;
267 5773 : const double rab2 = task->rab[0] * task->rab[0] +
268 5773 : task->rab[1] * task->rab[1] +
269 5773 : task->rab[2] * task->rab[2];
270 :
271 5773 : task->prefactor = exp(-task->zeta[0] * f * rab2);
272 5773 : task->zetp = zetp;
273 :
274 5773 : const int block_num = task->block_num;
275 :
276 23092 : for (int i = 0; i < 3; i++) {
277 17319 : task->ra[i] = ra[i];
278 17319 : task->rp[i] = ra[i] + f * task->rab[i];
279 17319 : task->rb[i] = ra[i] + task->rab[i];
280 : }
281 :
282 5773 : task->lmax[0] = ibasis->lmax[iset];
283 5773 : task->lmax[1] = jbasis->lmax[jset];
284 5773 : task->lmin[0] = ibasis->lmin[iset];
285 5773 : task->lmin[1] = jbasis->lmin[jset];
286 :
287 5773 : if ((block_num != prev_block_num) || (iset != prev_iset) ||
288 : (jset != prev_jset)) {
289 558 : task->update_block_ = true;
290 558 : prev_block_num = block_num;
291 558 : prev_iset = iset;
292 558 : prev_jset = jset;
293 : } else {
294 5215 : task->update_block_ = false;
295 : }
296 :
297 5773 : task->offset[0] = ipgf * ncoseta;
298 5773 : task->offset[1] = jpgf * ncosetb;
299 5773 : task++;
300 : }
301 :
302 : // Find largest Cartesian subblock size.
303 20 : ctx->maxco = 0;
304 56 : for (int i = 0; i < ctx->nkinds; i++) {
305 36 : ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
306 : }
307 : }
308 :
309 20 : void update_layouts(const int nlevels, const int npts_global[nlevels][3],
310 : const int npts_local[nlevels][3],
311 : const int shift_local[nlevels][3],
312 : const int border_width[nlevels][3],
313 : const double dh[nlevels][3][3],
314 : const double dh_inv[nlevels][3][3], grid_context *ctx) {
315 :
316 20 : assert(ctx != NULL);
317 20 : assert(ctx->checksum == ctx_checksum);
318 :
319 20 : if (ctx->layouts != NULL) {
320 12 : free(ctx->layouts);
321 : }
322 :
323 20 : ctx->layouts = malloc(sizeof(_layout) * nlevels);
324 :
325 100 : for (int level = 0; level < nlevels; level++) {
326 320 : for (int i = 0; i < 3; i++) {
327 240 : ctx->layouts[level].npts_global[i] = npts_global[level][i];
328 240 : ctx->layouts[level].npts_local[i] = npts_local[level][i];
329 240 : ctx->layouts[level].shift_local[i] = shift_local[level][i];
330 240 : ctx->layouts[level].border_width[i] = border_width[level][i];
331 960 : for (int j = 0; j < 3; j++) {
332 720 : ctx->layouts[level].dh[i][j] = dh[level][i][j];
333 720 : ctx->layouts[level].dh_inv[i][j] = dh_inv[level][i][j];
334 : }
335 : }
336 : }
337 20 : }
338 :
339 20 : void update_grid(const int nlevels, grid_context *ctx) {
340 20 : assert(ctx != NULL);
341 20 : assert(ctx->checksum == ctx_checksum);
342 :
343 20 : if (nlevels == 0)
344 : return;
345 :
346 20 : if (ctx->grid == NULL) {
347 8 : ctx->grid = malloc(sizeof(tensor) * nlevels);
348 : } else {
349 12 : if (ctx->nlevels_total < nlevels) {
350 0 : ctx->grid = realloc(ctx->grid, sizeof(tensor) * nlevels);
351 : }
352 : }
353 20 : assert(ctx->grid != NULL);
354 :
355 20 : ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
356 20 : ctx->nlevels = nlevels;
357 : }
358 :
359 8 : void *create_grid_context_dgemm(
360 : const bool orthorhombic, const int ntasks, const int nlevels,
361 : const int natoms, const int nkinds, const int nblocks,
362 : const int *block_offsets, const double atom_positions[natoms][3],
363 : const int *const atom_kinds, const grid_basis_set **const basis_sets,
364 : const int *const level_list, const int *const iatom_list,
365 : const int *jatom_list, const int *const iset_list,
366 : const int *const jset_list, const int *const ipgf_list,
367 : const int *const jpgf_list, const int *const border_mask_list,
368 : const int *block_num_list, const double *const radius_list,
369 : const double rab_list[ntasks][3], const int npts_global[nlevels][3],
370 : const int npts_local[nlevels][3], const int shift_local[nlevels][3],
371 : const int border_width[nlevels][3], const double dh[nlevels][3][3],
372 : const double dh_inv[nlevels][3][3]) {
373 :
374 8 : grid_context *ctx = malloc(sizeof(grid_context));
375 :
376 8 : memset(ctx, 0, sizeof(grid_context));
377 :
378 8 : ctx->checksum = ctx_checksum;
379 8 : ctx->orthorhombic = orthorhombic;
380 8 : update_block_offsets(nblocks, block_offsets, ctx);
381 8 : update_atoms_position(natoms, atom_positions, ctx);
382 8 : update_atoms_kinds(natoms, atom_kinds, ctx);
383 8 : update_basis_set(nkinds, basis_sets, ctx);
384 8 : update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
385 : iset_list, jset_list, ipgf_list, jpgf_list,
386 : border_mask_list, block_num_list, radius_list, rab_list,
387 : ctx);
388 8 : update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
389 : dh, dh_inv, ctx);
390 8 : update_grid(nlevels, ctx);
391 :
392 8 : const int max_threads = omp_get_max_threads();
393 :
394 8 : ctx->handler =
395 8 : malloc(sizeof(struct collocation_integration_ *) * max_threads);
396 :
397 16 : for (int i = 0; i < max_threads; i++) {
398 8 : ctx->handler[i] = collocate_create_handle();
399 : }
400 :
401 8 : ctx->number_of_handler = max_threads;
402 :
403 8 : return ctx;
404 : }
405 :
406 12 : void update_grid_context_dgemm(
407 : const bool orthorhombic, const int ntasks, const int nlevels,
408 : const int natoms, const int nkinds, const int nblocks,
409 : const int *block_offsets, const double atom_positions[natoms][3],
410 : const int *const atom_kinds, const grid_basis_set **const basis_sets,
411 : const int *const level_list, const int *const iatom_list,
412 : const int *jatom_list, const int *const iset_list,
413 : const int *const jset_list, const int *const ipgf_list,
414 : const int *const jpgf_list, const int *const border_mask_list,
415 : const int *block_num_list, const double *const radius_list,
416 : const double rab_list[ntasks][3], const int npts_global[nlevels][3],
417 : const int npts_local[nlevels][3], const int shift_local[nlevels][3],
418 : const int border_width[nlevels][3], const double dh[nlevels][3][3],
419 : const double dh_inv[nlevels][3][3], void *ptr) {
420 :
421 12 : assert(ptr != NULL);
422 12 : grid_context *ctx = (grid_context *)ptr;
423 12 : assert(ctx->checksum == ctx_checksum);
424 :
425 12 : ctx->orthorhombic = orthorhombic;
426 12 : update_block_offsets(nblocks, block_offsets, ctx);
427 12 : update_atoms_position(natoms, atom_positions, ctx);
428 12 : update_atoms_kinds(natoms, atom_kinds, ctx);
429 12 : update_basis_set(nkinds, basis_sets, ctx);
430 12 : update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
431 : iset_list, jset_list, ipgf_list, jpgf_list,
432 : border_mask_list, block_num_list, radius_list, rab_list,
433 : ctx);
434 12 : update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
435 : dh, dh_inv, ctx);
436 12 : update_grid(nlevels, ctx);
437 :
438 : // Find largest Cartesian subblock size.
439 12 : ctx->maxco = 0;
440 36 : for (int i = 0; i < nkinds; i++) {
441 24 : ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
442 : }
443 12 : }
444 :
445 0 : void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices,
446 : const int *device_id) {
447 0 : assert(ptr != NULL);
448 0 : grid_context *ctx = (grid_context *)ptr;
449 0 : assert(ctx->checksum == ctx_checksum);
450 0 : ctx->work_on_gpu = false;
451 0 : if (number_of_devices <= 0) {
452 : return;
453 : }
454 :
455 0 : ctx->number_of_devices = number_of_devices;
456 0 : ctx->queue_length = 8192;
457 0 : if (ctx->device_id == NULL) {
458 0 : ctx->device_id = malloc(sizeof(int) * number_of_devices);
459 : } else {
460 0 : ctx->device_id = realloc(ctx->device_id, sizeof(int) * number_of_devices);
461 : }
462 0 : assert(ctx->device_id != NULL);
463 :
464 0 : memcpy(ctx->device_id, device_id, sizeof(int) * number_of_devices);
465 : }
466 :
467 8 : void destroy_grid_context_dgemm(void *ptr) {
468 8 : assert(ptr);
469 8 : grid_context *ctx = (grid_context *)ptr;
470 8 : assert(ctx->checksum == ctx_checksum);
471 8 : free(ctx->block_offsets);
472 8 : free(ctx->atom_positions);
473 8 : free(ctx->atom_kinds);
474 8 : free(ctx->basis_sets);
475 8 : free(ctx->tasks[0]);
476 8 : free(ctx->tasks);
477 8 : free(ctx->tasks_per_level);
478 8 : free(ctx->layouts);
479 8 : free(ctx->grid);
480 8 : if (ctx->device_id)
481 0 : free(ctx->device_id);
482 :
483 8 : if (ctx->handler) {
484 16 : for (int i = 0; i < ctx->number_of_handler; i++) {
485 8 : collocate_destroy_handle(ctx->handler[i]);
486 : }
487 8 : free(ctx->handler);
488 : }
489 :
490 8 : free(ctx);
491 8 : }
492 :
493 0 : void apply_cutoff(void *ptr) {
494 0 : assert(ptr);
495 0 : grid_context *ctx = (grid_context *)ptr;
496 0 : assert(ctx->checksum == ctx_checksum);
497 0 : ctx->apply_cutoff = true;
498 0 : }
499 :
500 1256 : void set_grid_parameters(
501 : tensor *grid, const bool orthorhombic,
502 : const int grid_full_size[3], /* size of the full grid */
503 : const int grid_local_size[3], /* size of the local grid block */
504 : const int shift_local[3], /* coordinates of the lower coordinates of the
505 : local grid window */
506 : const int border_width[3], /* width of the borders */
507 : const double
508 : dh[3][3], /* displacement vectors of the grid (cartesian) -> (ijk) */
509 : const double dh_inv[3][3], /* (ijk) -> (x,y,z) */
510 : offload_buffer *grid_) {
511 1256 : memset(grid, 0, sizeof(tensor));
512 1256 : initialize_tensor_3(grid, grid_local_size[2], grid_local_size[1],
513 : grid_local_size[0]);
514 :
515 1256 : grid->data = grid_->host_buffer;
516 1256 : grid->ld_ = grid_local_size[0];
517 :
518 1256 : setup_global_grid_size(grid, &grid_full_size[0]);
519 :
520 : /* the grid is divided over several ranks or not periodic */
521 1256 : if ((grid_local_size[0] != grid_full_size[0]) ||
522 1256 : (grid_local_size[1] != grid_full_size[1]) ||
523 1256 : (grid_local_size[2] != grid_full_size[2])) {
524 0 : setup_grid_window(grid, shift_local, border_width, 0);
525 : } else {
526 1256 : grid->window_shift[0] = 0;
527 1256 : grid->window_shift[1] = 0;
528 1256 : grid->window_shift[2] = 0;
529 :
530 1256 : grid->window_size[0] = grid->size[0];
531 1256 : grid->window_size[1] = grid->size[1];
532 1256 : grid->window_size[2] = grid->size[2];
533 : }
534 :
535 1256 : grid->dh[0][0] = dh[0][0];
536 1256 : grid->dh[0][1] = dh[0][1];
537 1256 : grid->dh[0][2] = dh[0][2];
538 1256 : grid->dh[1][0] = dh[1][0];
539 1256 : grid->dh[1][1] = dh[1][1];
540 1256 : grid->dh[1][2] = dh[1][2];
541 1256 : grid->dh[2][0] = dh[2][0];
542 1256 : grid->dh[2][1] = dh[2][1];
543 1256 : grid->dh[2][2] = dh[2][2];
544 :
545 1256 : grid->dh_inv[0][0] = dh_inv[0][0];
546 1256 : grid->dh_inv[0][1] = dh_inv[0][1];
547 1256 : grid->dh_inv[0][2] = dh_inv[0][2];
548 1256 : grid->dh_inv[1][0] = dh_inv[1][0];
549 1256 : grid->dh_inv[1][1] = dh_inv[1][1];
550 1256 : grid->dh_inv[1][2] = dh_inv[1][2];
551 1256 : grid->dh_inv[2][0] = dh_inv[2][0];
552 1256 : grid->dh_inv[2][1] = dh_inv[2][1];
553 1256 : grid->dh_inv[2][2] = dh_inv[2][2];
554 :
555 1256 : verify_orthogonality(dh, grid->orthogonal);
556 :
557 1256 : if (orthorhombic) {
558 672 : grid->orthogonal[0] = true;
559 672 : grid->orthogonal[1] = true;
560 672 : grid->orthogonal[2] = true;
561 : }
562 1256 : }
563 :
564 : /*******************************************************************************
565 : * \brief Allocates a task list for the dgemm backend.
566 : * See grid_task_list.h for details.
567 : ******************************************************************************/
568 20 : void grid_dgemm_create_task_list(
569 : const bool orthorhombic, const int ntasks, const int nlevels,
570 : const int natoms, const int nkinds, const int nblocks,
571 : const int block_offsets[nblocks], const double atom_positions[natoms][3],
572 : const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
573 : const int level_list[ntasks], const int iatom_list[ntasks],
574 : const int jatom_list[ntasks], const int iset_list[ntasks],
575 : const int jset_list[ntasks], const int ipgf_list[ntasks],
576 : const int jpgf_list[ntasks], const int border_mask_list[ntasks],
577 : const int block_num_list[ntasks], const double radius_list[ntasks],
578 : const double rab_list[ntasks][3], const int npts_global[nlevels][3],
579 : const int npts_local[nlevels][3], const int shift_local[nlevels][3],
580 : const int border_width[nlevels][3], const double dh[nlevels][3][3],
581 : const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list) {
582 :
583 20 : if (*task_list == NULL) {
584 8 : *task_list = create_grid_context_dgemm(
585 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
586 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
587 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
588 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
589 : npts_local, shift_local, border_width, dh, dh_inv);
590 : } else {
591 12 : update_grid_context_dgemm(
592 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
593 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
594 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
595 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
596 : npts_local, shift_local, border_width, dh, dh_inv, *task_list);
597 : }
598 :
599 20 : const grid_library_config config = grid_library_get_config();
600 20 : if (config.apply_cutoff) {
601 0 : apply_cutoff(*task_list);
602 : }
603 20 : }
604 :
605 : /*******************************************************************************
606 : * \brief Deallocates given task list, basis_sets have to be freed separately.
607 : ******************************************************************************/
608 8 : void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list) {
609 8 : destroy_grid_context_dgemm(task_list);
610 8 : }
|