Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #ifndef GRID_DGEMM_TENSOR_LOCAL_H
9 : #define GRID_DGEMM_TENSOR_LOCAL_H
10 :
11 : #include <stdbool.h>
12 : #include <stdio.h>
13 : #include <stdlib.h>
14 : #include <string.h>
15 : #include <unistd.h>
16 :
17 : typedef struct tensor_ {
18 : int dim_;
19 : int size[4];
20 : size_t alloc_size_;
21 : size_t old_alloc_size_;
22 : int offsets[4];
23 : double *data;
24 : int ld_;
25 : int window_shift[4]; /* lower corner of the window. Should be between lower
26 : * corner and upper corner of the local grid */
27 : int window_size[4]; /* size of the window where computations should be
28 : * done */
29 : int full_size[4]; /* size of the global grid */
30 : int lower_corner[4]; /* coordinates of the lower corner of the local part of
31 : * the grid. It can be different from the window where
32 : * computations should be done. The upper corner can be
33 : * deduced with the sum of the grid size and the lower
34 : * corner */
35 : /* only relevant when the tensor represents a grid */
36 : double dh[3][3];
37 : double dh_inv[3][3];
38 : bool orthogonal[3];
39 : } tensor;
40 :
41 : extern void tensor_copy(tensor *const b, const tensor *const a);
42 :
43 : /* initialize a tensor structure for a tensor of dimension dim <= 4 */
44 :
45 1080955 : static inline void initialize_tensor(struct tensor_ *a, const int dim,
46 : const int *const sizes) {
47 1080955 : if (a == NULL)
48 : return;
49 :
50 1080955 : a->dim_ = dim;
51 3957837 : for (int d = 0; d < dim; d++)
52 2876882 : a->size[d] = sizes[d];
53 :
54 : // we need proper alignment here. But can be done later
55 : /* a->ld_ = (sizes[a->dim_ - 1] / 32 + 1) * 32; */
56 1080955 : a->ld_ = sizes[a->dim_ - 1];
57 1080955 : switch (a->dim_) {
58 88000 : case 4: {
59 88000 : a->offsets[0] = a->ld_ * a->size[1] * a->size[2];
60 88000 : a->offsets[1] = a->ld_ * a->size[2];
61 88000 : a->offsets[2] = a->ld_;
62 88000 : break;
63 : }
64 538972 : case 3: {
65 538972 : a->offsets[0] = a->ld_ * a->size[1];
66 538972 : a->offsets[1] = a->ld_;
67 538972 : } break;
68 453983 : case 2: { // matrix case
69 453983 : a->offsets[0] = a->ld_;
70 453983 : } break;
71 : case 1:
72 : break;
73 : }
74 :
75 1080955 : a->alloc_size_ = a->offsets[0] * a->size[0];
76 1080955 : return;
77 : }
78 :
79 : /* initialize a tensor structure for a tensor of dimension dim = 2 */
80 :
81 405740 : static inline void initialize_tensor_2(struct tensor_ *a, int n1, int n2) {
82 109145 : if (a == NULL)
83 0 : return;
84 :
85 405740 : int size_[2] = {n1, n2};
86 405740 : initialize_tensor(a, 2, size_);
87 : }
88 :
89 : /* initialize a tensor structure for a tensor of dimension dim = 2 */
90 :
91 538892 : static inline void initialize_tensor_3(struct tensor_ *a, int n1, int n2,
92 : int n3) {
93 402572 : if (a == NULL)
94 0 : return;
95 538892 : int size_[3] = {n1, n2, n3};
96 470732 : initialize_tensor(a, 3, size_);
97 : }
98 :
99 : /* initialize a tensor structure for a tensor of dimension dim = 2 */
100 :
101 88000 : static inline void initialize_tensor_4(struct tensor_ *a, int n1, int n2,
102 : int n3, int n4) {
103 88000 : if (a == NULL)
104 0 : return;
105 88000 : int size_[4] = {n1, n2, n3, n4};
106 88000 : initialize_tensor(a, 4, size_);
107 : }
108 :
109 : /* initialize a tensor structure for a tensor of dimension dim = 2 */
110 :
111 : static inline tensor *create_tensor(const int dim, const int *sizes) {
112 : tensor *a = (tensor *)malloc(sizeof(struct tensor_));
113 :
114 : if (a == NULL)
115 : abort();
116 :
117 : initialize_tensor(a, dim, sizes);
118 : a->data = (double *)malloc(sizeof(double) * a->alloc_size_);
119 : if (a->data == NULL)
120 : abort();
121 : a->old_alloc_size_ = a->alloc_size_;
122 : return a;
123 : }
124 :
125 : /* destroy a tensor created with the function above */
126 : static inline void destroy_tensor(tensor *a) {
127 : if (a->data)
128 : free(a->data);
129 : free(a);
130 : }
131 :
132 : static inline size_t tensor_return_memory_size(const struct tensor_ *const a) {
133 : if (a == NULL)
134 : abort();
135 :
136 : return a->alloc_size_;
137 : }
138 :
139 : static inline void tensor_assign_memory(struct tensor_ *a, void *data) {
140 : if (a == NULL)
141 : abort();
142 : a->data = (double *)data;
143 : }
144 :
145 : static inline int tensor_get_leading_dimension(struct tensor_ *a) {
146 : if (a == NULL)
147 : abort();
148 : return a->ld_;
149 : }
150 :
151 : static inline void tensor_set_leading_dimension(struct tensor_ *a,
152 : const int ld) {
153 : if (a == NULL)
154 : abort();
155 : a->ld_ = ld;
156 : }
157 :
158 : static inline void recompute_tensor_offsets(struct tensor_ *a) {
159 : if (a == NULL)
160 : abort();
161 :
162 : switch (a->dim_) {
163 : case 5: {
164 : a->offsets[0] = a->ld_ * a->size[1] * a->size[2] * a->size[3];
165 : a->offsets[1] = a->ld_ * a->size[1] * a->size[2];
166 : a->offsets[2] = a->ld_ * a->size[2];
167 : a->offsets[3] = a->ld_;
168 : break;
169 : }
170 : case 4: {
171 : a->offsets[0] = a->ld_ * a->size[1] * a->size[2];
172 : a->offsets[1] = a->ld_ * a->size[2];
173 : a->offsets[2] = a->ld_;
174 : break;
175 : }
176 : case 3: {
177 : a->offsets[0] = a->ld_ * a->size[1];
178 : a->offsets[1] = a->ld_;
179 : } break;
180 : case 2: { // matrix case
181 : a->offsets[0] = a->ld_;
182 : } break;
183 : case 1:
184 : break;
185 : }
186 : }
187 :
188 88000 : static inline size_t compute_memory_space_tensor_3(const int n1, const int n2,
189 : const int n3) {
190 88000 : return (n1 * n2 * n3);
191 : }
192 :
193 0 : static inline size_t compute_memory_space_tensor_4(const int n1, const int n2,
194 : const int n3, const int n4) {
195 0 : return (n1 * n2 * n3 * n4);
196 : }
197 :
198 1256 : static inline void setup_global_grid_size(tensor *const grid,
199 : const int *const full_size) {
200 1256 : switch (grid->dim_) {
201 0 : case 1:
202 0 : grid->full_size[0] = full_size[0];
203 0 : break;
204 0 : case 2: {
205 0 : grid->full_size[1] = full_size[0];
206 0 : grid->full_size[0] = full_size[1];
207 0 : } break;
208 1256 : case 3: {
209 1256 : grid->full_size[0] = full_size[2];
210 1256 : grid->full_size[1] = full_size[1];
211 1256 : grid->full_size[2] = full_size[0];
212 1256 : } break;
213 : default:
214 0 : for (int d = 0; d < grid->dim_; d++)
215 0 : grid->full_size[d] = full_size[grid->dim_ - d - 1];
216 : break;
217 : }
218 1256 : }
219 :
220 0 : static inline void setup_grid_window(tensor *const grid,
221 : const int *const shift_local,
222 : const int *const border_width,
223 : const int border_mask) {
224 0 : for (int d = 0; d < grid->dim_; d++) {
225 0 : grid->lower_corner[d] = shift_local[grid->dim_ - d - 1];
226 0 : grid->window_shift[d] = 0;
227 0 : grid->window_size[d] = grid->size[d];
228 0 : if (grid->size[d] != grid->full_size[d]) {
229 0 : grid->window_size[d]--;
230 : }
231 : }
232 :
233 0 : if (border_width) {
234 0 : if (border_mask & (1 << 0))
235 0 : grid->window_shift[2] += border_width[0];
236 0 : if (border_mask & (1 << 1))
237 0 : grid->window_size[2] -= border_width[0];
238 0 : if (border_mask & (1 << 2))
239 0 : grid->window_shift[1] += border_width[1];
240 0 : if (border_mask & (1 << 3))
241 0 : grid->window_size[1] -= border_width[1];
242 0 : if (border_mask & (1 << 4))
243 0 : grid->window_shift[0] += border_width[2];
244 0 : if (border_mask & (1 << 5))
245 0 : grid->window_size[0] -= border_width[2];
246 : }
247 0 : }
248 :
249 : extern size_t realloc_tensor(tensor *t);
250 : extern void alloc_tensor(tensor *t);
251 :
252 : #define idx5(a, i, j, k, l, m) \
253 : a.data[(i) * a.offsets[0] + (j) * a.offsets[1] + (k) * a.offsets[2] + \
254 : (l) * a.ld_ + m]
255 : #define idx4(a, i, j, k, l) \
256 : a.data[(i) * a.offsets[0] + (j) * a.offsets[1] + (k) * a.ld_ + (l)]
257 : #define idx3(a, i, j, k) a.data[(i) * a.offsets[0] + (j) * a.ld_ + (k)]
258 : #define idx2(a, i, j) a.data[(i) * a.ld_ + (j)]
259 : #endif
|