Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief represent the structure of a full matrix
10 : !> \par History
11 : !> 08.2002 created [fawzi]
12 : !> \author Fawzi Mohamed
13 : ! **************************************************************************************************
14 : MODULE cp_fm_struct
15 : USE cp_blacs_env, ONLY: cp_blacs_env_release,&
16 : cp_blacs_env_type
17 : USE cp_log_handling, ONLY: cp_get_default_logger,&
18 : cp_logger_get_default_unit_nr,&
19 : cp_logger_type,&
20 : cp_to_string
21 : USE kinds, ONLY: dp
22 : USE machine, ONLY: m_flush
23 : USE message_passing, ONLY: mp_para_env_release,&
24 : mp_para_env_type
25 : #include "../base/base_uses.f90"
26 :
27 : IMPLICIT NONE
28 : PRIVATE
29 :
30 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
31 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_struct'
32 :
33 : ! the default blacs block sizes
34 : ! consider using #ifdefs to give them the optimal values
35 : ! these can be changed using scf_control
36 : ! *** these are used by default
37 : INTEGER, PRIVATE :: optimal_blacs_col_block_size = 32
38 : INTEGER, PRIVATE :: optimal_blacs_row_block_size = 32
39 : LOGICAL, PRIVATE :: force_block_size = .FALSE.
40 :
41 : PUBLIC :: cp_fm_struct_type, cp_fm_struct_p_type
42 : PUBLIC :: cp_fm_struct_create, cp_fm_struct_retain, cp_fm_struct_release, &
43 : cp_fm_struct_equivalent, &
44 : cp_fm_struct_get, cp_fm_struct_double, cp_fm_struct_config, &
45 : cp_fm_struct_get_nrow_block, cp_fm_struct_get_ncol_block, &
46 : cp_fm_struct_write_info
47 :
48 : ! **************************************************************************************************
49 : !> \brief keeps the information about the structure of a full matrix
50 : !> \param para_env the parallel environment of the matrices with this structure
51 : !> \param context the blacs context (parallel environment for scalapack),
52 : !> should be compatible with para_env
53 : !> \param descriptor the scalapack descriptor of the matrices, when using
54 : !> scalapack (ncol_block=descriptor(6), ncol_global=descriptor(4),
55 : !> nrow_block=descriptor(5), nrow_global=descriptor(3))
56 : !> \param ncol_block number of columns of a scalapack block
57 : !> \param nrow_block number of rows of a scalapack block
58 : !> \param nrow_global number of rows of the matrix
59 : !> \param ncol_global number of rows
60 : !> \param first_p_pos position of the first processor (for scalapack)
61 : !> \param row_indices real (global) indices of the rows (defined only for
62 : !> the local rows really used)
63 : !> \param col_indices real (global) indices of the cols (defined only for
64 : !> the local cols really used)
65 : !> \param nrow_locals nrow_locals(i) number of local rows of the matrix really
66 : !> used on the processors with context%mepos(1)==i
67 : !> \param ncol_locals ncol_locals(i) number of local rows of the matrix really
68 : !> used on the processors with context%mepos(2)==i
69 : !> \param ref_count reference count (see doc/ReferenceCounting.html)
70 : !> \param local_leading_dimension leading dimension of the data that is
71 : !> stored on this processor
72 : !>
73 : !> readonly attributes:
74 : !> \param nrow_local number of local rows really used on the actual processor
75 : !> \param ncol_local number of local cols really used on the actual processor
76 : !> \note
77 : !> use cp_fm_struct_get to extract information from this structure
78 : !> \par History
79 : !> 08.2002 created [fawzi]
80 : !> \author Fawzi Mohamed
81 : ! **************************************************************************************************
82 : TYPE cp_fm_struct_type
83 : TYPE(mp_para_env_type), POINTER :: para_env => NULL()
84 : TYPE(cp_blacs_env_type), POINTER :: context => NULL()
85 : INTEGER, DIMENSION(9) :: descriptor = -1
86 : INTEGER :: nrow_block = -1, ncol_block = -1, nrow_global = -1, ncol_global = -1
87 : INTEGER, DIMENSION(2) :: first_p_pos = -1
88 : INTEGER, DIMENSION(:), POINTER :: row_indices => NULL(), col_indices => NULL(), &
89 : nrow_locals => NULL(), ncol_locals => NULL()
90 : INTEGER :: ref_count = -1, local_leading_dimension = -1
91 : CONTAINS
92 : PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2p_row => cp_fm_indxg2p_row
93 : PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2p_col => cp_fm_indxg2p_col
94 : PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2l_row => cp_fm_indxg2l_row
95 : PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2l_col => cp_fm_indxg2l_col
96 : PROCEDURE, PASS(struct), NON_OVERRIDABLE :: l2g_row => cp_fm_indxl2g_row
97 : PROCEDURE, PASS(struct), NON_OVERRIDABLE :: l2g_col => cp_fm_indxl2g_col
98 : END TYPE cp_fm_struct_type
99 : ! **************************************************************************************************
100 : TYPE cp_fm_struct_p_type
101 : TYPE(cp_fm_struct_type), POINTER :: struct => NULL()
102 : END TYPE cp_fm_struct_p_type
103 :
104 : CONTAINS
105 :
106 : ! **************************************************************************************************
107 : !> \brief allocates and initializes a full matrix structure
108 : !> \param fmstruct the pointer that will point to the new structure
109 : !> \param para_env the parallel environment
110 : !> \param context the blacs context of this matrix
111 : !> \param nrow_global the number of row of the full matrix
112 : !> \param ncol_global the number of columns of the full matrix
113 : !> \param nrow_block the number of rows of a block of the matrix,
114 : !> omit or set to -1 to use the built-in defaults
115 : !> \param ncol_block the number of columns of a block of the matrix,
116 : !> omit or set to -1 to use the built-in defaults
117 : !> \param descriptor the scalapack descriptor of the matrix (if not given
118 : !> a new one is allocated
119 : !> \param first_p_pos ...
120 : !> \param local_leading_dimension the leading dimension of the locally stored
121 : !> data block
122 : !> \param template_fmstruct a matrix structure where to take the default values
123 : !> \param square_blocks ...
124 : !> \param force_block ...
125 : !> \par History
126 : !> 08.2002 created [fawzi]
127 : !> \author Fawzi Mohamed
128 : ! **************************************************************************************************
129 462120 : SUBROUTINE cp_fm_struct_create(fmstruct, para_env, context, nrow_global, &
130 : ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, &
131 : local_leading_dimension, template_fmstruct, square_blocks, force_block)
132 :
133 : TYPE(cp_fm_struct_type), POINTER :: fmstruct
134 : TYPE(mp_para_env_type), TARGET, OPTIONAL :: para_env
135 : INTEGER, INTENT(in), OPTIONAL :: nrow_global, ncol_global
136 : INTEGER, INTENT(in), OPTIONAL :: nrow_block, ncol_block
137 : INTEGER, INTENT(in), OPTIONAL :: local_leading_dimension
138 : TYPE(cp_blacs_env_type), TARGET, OPTIONAL :: context
139 : INTEGER, DIMENSION(9), INTENT(in), OPTIONAL :: descriptor
140 : INTEGER, OPTIONAL, DIMENSION(2) :: first_p_pos
141 : TYPE(cp_fm_struct_type), POINTER, OPTIONAL :: template_fmstruct
142 : LOGICAL, OPTIONAL, INTENT(in) :: square_blocks
143 : LOGICAL, OPTIONAL, INTENT(in) :: force_block
144 :
145 : INTEGER :: dumblock, i
146 : #if defined(__parallel)
147 : INTEGER :: iunit, stat
148 : INTEGER, EXTERNAL :: numroc
149 : TYPE(cp_logger_type), POINTER :: logger
150 : #endif
151 :
152 : LOGICAL :: my_square_blocks, my_force_block
153 :
154 6469680 : ALLOCATE (fmstruct)
155 :
156 462120 : fmstruct%nrow_block = optimal_blacs_row_block_size
157 462120 : fmstruct%ncol_block = optimal_blacs_col_block_size
158 :
159 462120 : IF (.NOT. PRESENT(template_fmstruct)) THEN
160 416424 : CPASSERT(PRESENT(context))
161 416424 : CPASSERT(PRESENT(nrow_global))
162 416424 : CPASSERT(PRESENT(ncol_global))
163 416424 : fmstruct%local_leading_dimension = 1
164 : ELSE
165 45696 : fmstruct%context => template_fmstruct%context
166 45696 : fmstruct%para_env => template_fmstruct%para_env
167 913920 : fmstruct%descriptor = template_fmstruct%descriptor
168 45696 : fmstruct%nrow_block = template_fmstruct%nrow_block
169 45696 : fmstruct%nrow_global = template_fmstruct%nrow_global
170 45696 : fmstruct%ncol_block = template_fmstruct%ncol_block
171 45696 : fmstruct%ncol_global = template_fmstruct%ncol_global
172 274176 : fmstruct%first_p_pos = template_fmstruct%first_p_pos
173 : fmstruct%local_leading_dimension = &
174 45696 : template_fmstruct%local_leading_dimension
175 : END IF
176 :
177 462120 : my_force_block = force_block_size
178 462120 : IF (PRESENT(force_block)) my_force_block = force_block
179 :
180 462120 : IF (PRESENT(context)) THEN
181 416424 : fmstruct%context => context
182 416424 : fmstruct%para_env => context%para_env
183 : END IF
184 462120 : IF (PRESENT(para_env)) fmstruct%para_env => para_env
185 462120 : CALL fmstruct%context%retain()
186 462120 : CALL fmstruct%para_env%retain()
187 :
188 462120 : IF (PRESENT(nrow_global)) THEN
189 459784 : fmstruct%nrow_global = nrow_global
190 459784 : fmstruct%local_leading_dimension = 1
191 : END IF
192 462120 : IF (PRESENT(ncol_global)) THEN
193 461876 : fmstruct%ncol_global = ncol_global
194 : END IF
195 :
196 : ! try to avoid small left-over blocks (anyway naive)
197 462120 : IF (PRESENT(nrow_block)) THEN
198 104838 : IF (nrow_block > 0) & ! allows setting the number of blocks to -1 to explicitly set to auto
199 51454 : fmstruct%nrow_block = nrow_block
200 : END IF
201 462120 : IF (.NOT. my_force_block) THEN
202 : dumblock = CEILING(REAL(fmstruct%nrow_global, KIND=dp)/ &
203 426131 : REAL(fmstruct%context%num_pe(1), KIND=dp))
204 426131 : fmstruct%nrow_block = MAX(1, MIN(fmstruct%nrow_block, dumblock))
205 : END IF
206 462120 : IF (PRESENT(ncol_block)) THEN
207 113118 : IF (ncol_block > 0) & ! allows setting the number of blocks to -1 to explicitly set to auto
208 59734 : fmstruct%ncol_block = ncol_block
209 : END IF
210 462120 : IF (.NOT. my_force_block) THEN
211 : dumblock = CEILING(REAL(fmstruct%ncol_global, KIND=dp)/ &
212 426131 : REAL(fmstruct%context%num_pe(2), KIND=dp))
213 426131 : fmstruct%ncol_block = MAX(1, MIN(fmstruct%ncol_block, dumblock))
214 : END IF
215 :
216 : ! square matrix -> square blocks (otherwise some op fail)
217 462120 : my_square_blocks = fmstruct%nrow_global == fmstruct%ncol_global
218 462120 : IF (PRESENT(square_blocks)) my_square_blocks = square_blocks
219 462120 : IF (my_square_blocks) THEN
220 285459 : fmstruct%nrow_block = MIN(fmstruct%nrow_block, fmstruct%ncol_block)
221 285459 : fmstruct%ncol_block = fmstruct%nrow_block
222 : END IF
223 :
224 : ALLOCATE (fmstruct%nrow_locals(0:(fmstruct%context%num_pe(1) - 1)), &
225 2310600 : fmstruct%ncol_locals(0:(fmstruct%context%num_pe(2) - 1)))
226 462120 : IF (.NOT. PRESENT(template_fmstruct)) &
227 1249272 : fmstruct%first_p_pos = (/0, 0/)
228 462120 : IF (PRESENT(first_p_pos)) fmstruct%first_p_pos = first_p_pos
229 :
230 1295420 : fmstruct%nrow_locals = 0
231 924240 : fmstruct%ncol_locals = 0
232 : #if defined(__parallel)
233 : fmstruct%nrow_locals(fmstruct%context%mepos(1)) = &
234 : numroc(fmstruct%nrow_global, fmstruct%nrow_block, &
235 : fmstruct%context%mepos(1), fmstruct%first_p_pos(1), &
236 462120 : fmstruct%context%num_pe(1))
237 : fmstruct%ncol_locals(fmstruct%context%mepos(2)) = &
238 : numroc(fmstruct%ncol_global, fmstruct%ncol_block, &
239 : fmstruct%context%mepos(2), fmstruct%first_p_pos(2), &
240 462120 : fmstruct%context%num_pe(2))
241 2128720 : CALL fmstruct%para_env%sum(fmstruct%nrow_locals)
242 1386360 : CALL fmstruct%para_env%sum(fmstruct%ncol_locals)
243 1295420 : fmstruct%nrow_locals(:) = fmstruct%nrow_locals(:)/fmstruct%context%num_pe(2)
244 924240 : fmstruct%ncol_locals(:) = fmstruct%ncol_locals(:)/fmstruct%context%num_pe(1)
245 :
246 1757540 : IF (SUM(fmstruct%ncol_locals) .NE. fmstruct%ncol_global .OR. &
247 : SUM(fmstruct%nrow_locals) .NE. fmstruct%nrow_global) THEN
248 : ! try to collect some output if this is going to happen again
249 : ! this seems to trigger on blanc, but should really never happen
250 0 : logger => cp_get_default_logger()
251 0 : iunit = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
252 0 : WRITE (iunit, *) "mepos", fmstruct%context%mepos(1:2), "numpe", fmstruct%context%num_pe(1:2)
253 0 : WRITE (iunit, *) "ncol_global", fmstruct%ncol_global
254 0 : WRITE (iunit, *) "nrow_global", fmstruct%nrow_global
255 0 : WRITE (iunit, *) "ncol_locals", fmstruct%ncol_locals
256 0 : WRITE (iunit, *) "nrow_locals", fmstruct%nrow_locals
257 0 : CALL m_flush(iunit)
258 : END IF
259 :
260 924240 : IF (SUM(fmstruct%ncol_locals) .NE. fmstruct%ncol_global) &
261 0 : CPABORT("sum of local cols not equal global cols")
262 1295420 : IF (SUM(fmstruct%nrow_locals) .NE. fmstruct%nrow_global) &
263 0 : CPABORT("sum of local row not equal global rows")
264 : #else
265 : ! block = full matrix
266 : fmstruct%nrow_block = fmstruct%nrow_global
267 : fmstruct%ncol_block = fmstruct%ncol_global
268 : fmstruct%nrow_locals(fmstruct%context%mepos(1)) = fmstruct%nrow_global
269 : fmstruct%ncol_locals(fmstruct%context%mepos(2)) = fmstruct%ncol_global
270 : #endif
271 :
272 : fmstruct%local_leading_dimension = MAX(fmstruct%local_leading_dimension, &
273 462120 : fmstruct%nrow_locals(fmstruct%context%mepos(1)))
274 462120 : IF (PRESENT(local_leading_dimension)) THEN
275 0 : IF (MAX(1, fmstruct%nrow_locals(fmstruct%context%mepos(1))) > local_leading_dimension) &
276 : CALL cp_abort(__LOCATION__, "local_leading_dimension too small ("// &
277 : cp_to_string(local_leading_dimension)//"<"// &
278 0 : cp_to_string(fmstruct%local_leading_dimension)//")")
279 0 : fmstruct%local_leading_dimension = local_leading_dimension
280 : END IF
281 :
282 462120 : NULLIFY (fmstruct%row_indices, fmstruct%col_indices)
283 :
284 : ! the max should go away
285 1386360 : ALLOCATE (fmstruct%row_indices(MAX(fmstruct%nrow_locals(fmstruct%context%mepos(1)), 1)))
286 5933321 : DO i = 1, SIZE(fmstruct%row_indices)
287 : #ifdef __parallel
288 5933321 : fmstruct%row_indices(i) = fmstruct%l2g_row(i, fmstruct%context%mepos(1))
289 : #else
290 : fmstruct%row_indices(i) = i
291 : #endif
292 : END DO
293 1386360 : ALLOCATE (fmstruct%col_indices(MAX(fmstruct%ncol_locals(fmstruct%context%mepos(2)), 1)))
294 6203051 : DO i = 1, SIZE(fmstruct%col_indices)
295 : #ifdef __parallel
296 6203051 : fmstruct%col_indices(i) = fmstruct%l2g_col(i, fmstruct%context%mepos(2))
297 : #else
298 : fmstruct%col_indices(i) = i
299 : #endif
300 : END DO
301 :
302 462120 : fmstruct%ref_count = 1
303 :
304 462120 : IF (PRESENT(descriptor)) THEN
305 0 : fmstruct%descriptor = descriptor
306 : ELSE
307 4621200 : fmstruct%descriptor = 0
308 : #if defined(__parallel)
309 : ! local leading dimension needs to be at least 1
310 : CALL descinit(fmstruct%descriptor, fmstruct%nrow_global, &
311 : fmstruct%ncol_global, fmstruct%nrow_block, &
312 : fmstruct%ncol_block, fmstruct%first_p_pos(1), &
313 : fmstruct%first_p_pos(2), fmstruct%context, &
314 462120 : fmstruct%local_leading_dimension, stat)
315 462120 : CPASSERT(stat == 0)
316 : #endif
317 : END IF
318 462120 : END SUBROUTINE cp_fm_struct_create
319 :
320 : ! **************************************************************************************************
321 : !> \brief retains a full matrix structure
322 : !> \param fmstruct the structure to retain
323 : !> \par History
324 : !> 08.2002 created [fawzi]
325 : !> \author Fawzi Mohamed
326 : ! **************************************************************************************************
327 1465265 : SUBROUTINE cp_fm_struct_retain(fmstruct)
328 : TYPE(cp_fm_struct_type), INTENT(INOUT) :: fmstruct
329 :
330 1465265 : CPASSERT(fmstruct%ref_count > 0)
331 1465265 : fmstruct%ref_count = fmstruct%ref_count + 1
332 1465265 : END SUBROUTINE cp_fm_struct_retain
333 :
334 : ! **************************************************************************************************
335 : !> \brief releases a full matrix structure
336 : !> \param fmstruct the structure to release
337 : !> \par History
338 : !> 08.2002 created [fawzi]
339 : !> \author Fawzi Mohamed
340 : ! **************************************************************************************************
341 1955236 : SUBROUTINE cp_fm_struct_release(fmstruct)
342 : TYPE(cp_fm_struct_type), POINTER :: fmstruct
343 :
344 1955236 : IF (ASSOCIATED(fmstruct)) THEN
345 1927385 : CPASSERT(fmstruct%ref_count > 0)
346 1927385 : fmstruct%ref_count = fmstruct%ref_count - 1
347 1927385 : IF (fmstruct%ref_count < 1) THEN
348 462120 : CALL cp_blacs_env_release(fmstruct%context)
349 462120 : CALL mp_para_env_release(fmstruct%para_env)
350 462120 : IF (ASSOCIATED(fmstruct%row_indices)) THEN
351 462120 : DEALLOCATE (fmstruct%row_indices)
352 : END IF
353 462120 : IF (ASSOCIATED(fmstruct%col_indices)) THEN
354 462120 : DEALLOCATE (fmstruct%col_indices)
355 : END IF
356 462120 : IF (ASSOCIATED(fmstruct%nrow_locals)) THEN
357 462120 : DEALLOCATE (fmstruct%nrow_locals)
358 : END IF
359 462120 : IF (ASSOCIATED(fmstruct%ncol_locals)) THEN
360 462120 : DEALLOCATE (fmstruct%ncol_locals)
361 : END IF
362 462120 : DEALLOCATE (fmstruct)
363 : END IF
364 : END IF
365 1955236 : NULLIFY (fmstruct)
366 1955236 : END SUBROUTINE cp_fm_struct_release
367 :
368 : ! **************************************************************************************************
369 : !> \brief returns true if the two matrix structures are equivalent, false
370 : !> otherwise.
371 : !> \param fmstruct1 one of the full matrix structures to compare
372 : !> \param fmstruct2 the second of the full matrix structures to compare
373 : !> \return ...
374 : !> \par History
375 : !> 08.2002 created [fawzi]
376 : !> \author Fawzi Mohamed
377 : ! **************************************************************************************************
378 2278397 : FUNCTION cp_fm_struct_equivalent(fmstruct1, fmstruct2) RESULT(res)
379 : TYPE(cp_fm_struct_type), POINTER :: fmstruct1, fmstruct2
380 : LOGICAL :: res
381 :
382 : INTEGER :: i
383 :
384 2278397 : CPASSERT(ASSOCIATED(fmstruct1))
385 2278397 : CPASSERT(ASSOCIATED(fmstruct2))
386 2278397 : CPASSERT(fmstruct1%ref_count > 0)
387 2278397 : CPASSERT(fmstruct2%ref_count > 0)
388 2278397 : IF (ASSOCIATED(fmstruct1, fmstruct2)) THEN
389 : res = .TRUE.
390 : ELSE
391 : res = (fmstruct1%context == fmstruct2%context) .AND. &
392 : (fmstruct1%nrow_global == fmstruct2%nrow_global) .AND. &
393 : (fmstruct1%ncol_global == fmstruct2%ncol_global) .AND. &
394 : (fmstruct1%nrow_block == fmstruct2%nrow_block) .AND. &
395 : (fmstruct1%ncol_block == fmstruct2%ncol_block) .AND. &
396 : (fmstruct1%local_leading_dimension == &
397 460429 : fmstruct2%local_leading_dimension)
398 4604290 : DO i = 1, 9
399 4604290 : res = res .AND. (fmstruct1%descriptor(i) == fmstruct1%descriptor(i))
400 : END DO
401 : END IF
402 2278397 : END FUNCTION cp_fm_struct_equivalent
403 :
404 : ! **************************************************************************************************
405 : !> \brief returns the values of various attributes of the matrix structure
406 : !> \param fmstruct the structure you want info about
407 : !> \param para_env ...
408 : !> \param context ...
409 : !> \param descriptor ...
410 : !> \param ncol_block ...
411 : !> \param nrow_block ...
412 : !> \param nrow_global ...
413 : !> \param ncol_global ...
414 : !> \param first_p_pos ...
415 : !> \param row_indices ...
416 : !> \param col_indices ...
417 : !> \param nrow_local ...
418 : !> \param ncol_local ...
419 : !> \param nrow_locals ...
420 : !> \param ncol_locals ...
421 : !> \param local_leading_dimension ...
422 : !> \par History
423 : !> 08.2002 created [fawzi]
424 : !> \author Fawzi Mohamed
425 : ! **************************************************************************************************
426 5995920 : SUBROUTINE cp_fm_struct_get(fmstruct, para_env, context, &
427 : descriptor, ncol_block, nrow_block, nrow_global, &
428 : ncol_global, first_p_pos, row_indices, &
429 : col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, &
430 : local_leading_dimension)
431 : TYPE(cp_fm_struct_type), INTENT(IN) :: fmstruct
432 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
433 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
434 : INTEGER, DIMENSION(9), INTENT(OUT), OPTIONAL :: descriptor
435 : INTEGER, INTENT(out), OPTIONAL :: ncol_block, nrow_block, nrow_global, &
436 : ncol_global
437 : INTEGER, DIMENSION(2), INTENT(out), OPTIONAL :: first_p_pos
438 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
439 : INTEGER, INTENT(out), OPTIONAL :: nrow_local, ncol_local
440 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
441 : INTEGER, INTENT(out), OPTIONAL :: local_leading_dimension
442 :
443 5995920 : IF (PRESENT(para_env)) para_env => fmstruct%para_env
444 5995920 : IF (PRESENT(context)) context => fmstruct%context
445 5995920 : IF (PRESENT(descriptor)) descriptor = fmstruct%descriptor
446 5995920 : IF (PRESENT(ncol_block)) ncol_block = fmstruct%ncol_block
447 5995920 : IF (PRESENT(nrow_block)) nrow_block = fmstruct%nrow_block
448 5995920 : IF (PRESENT(nrow_global)) nrow_global = fmstruct%nrow_global
449 5995920 : IF (PRESENT(ncol_global)) ncol_global = fmstruct%ncol_global
450 5995920 : IF (PRESENT(first_p_pos)) first_p_pos = fmstruct%first_p_pos
451 5995920 : IF (PRESENT(nrow_locals)) nrow_locals => fmstruct%nrow_locals
452 5995920 : IF (PRESENT(ncol_locals)) ncol_locals => fmstruct%ncol_locals
453 5995920 : IF (PRESENT(local_leading_dimension)) local_leading_dimension = &
454 35129 : fmstruct%local_leading_dimension
455 :
456 5995920 : IF (PRESENT(nrow_local)) nrow_local = fmstruct%nrow_locals(fmstruct%context%mepos(1))
457 5995920 : IF (PRESENT(ncol_local)) ncol_local = fmstruct%ncol_locals(fmstruct%context%mepos(2))
458 :
459 5995920 : IF (PRESENT(row_indices)) row_indices => fmstruct%row_indices
460 5995920 : IF (PRESENT(col_indices)) col_indices => fmstruct%col_indices
461 5995920 : END SUBROUTINE cp_fm_struct_get
462 :
463 : ! **************************************************************************************************
464 : !> \brief Write nicely formatted info about the FM struct to the given I/O unit
465 : !> \param fmstruct a cp_fm_struct_type instance
466 : !> \param io_unit the I/O unit to use for writing
467 : ! **************************************************************************************************
468 3 : SUBROUTINE cp_fm_struct_write_info(fmstruct, io_unit)
469 : TYPE(cp_fm_struct_type), INTENT(IN) :: fmstruct
470 : INTEGER, INTENT(IN) :: io_unit
471 :
472 : INTEGER, PARAMETER :: oblock_size = 8
473 :
474 : CHARACTER(len=30) :: fm
475 : INTEGER :: oblock
476 :
477 3 : WRITE (fm, "(A,I2,A)") "(A,I5,A,I5,A,", oblock_size, "I6)"
478 :
479 3 : WRITE (io_unit, '(A,I12)') "CP_FM_STRUCT | No. of matrix columns: ", fmstruct%ncol_global
480 3 : WRITE (io_unit, '(A,I12)') "CP_FM_STRUCT | No. of matrix rows: ", fmstruct%nrow_global
481 3 : WRITE (io_unit, '(A,I12)') "CP_FM_STRUCT | No. of block columns: ", fmstruct%ncol_block
482 3 : WRITE (io_unit, '(A,I12)') "CP_FM_STRUCT | No. of block rows: ", fmstruct%nrow_block
483 :
484 3 : WRITE (io_unit, '(A)') "CP_FM_STRUCT | Number of local columns: "
485 6 : DO oblock = 0, (SIZE(fmstruct%ncol_locals) - 1)/oblock_size
486 3 : WRITE (io_unit, fm) "CP_FM_STRUCT | CPUs ", &
487 3 : oblock*oblock_size, "..", (oblock + 1)*oblock_size - 1, ": ", &
488 9 : fmstruct%ncol_locals(oblock*oblock_size:MIN(SIZE(fmstruct%ncol_locals), (oblock + 1)*oblock_size) - 1)
489 : END DO
490 :
491 3 : WRITE (io_unit, '(A)') "CP_FM_STRUCT | Number of local rows: "
492 6 : DO oblock = 0, (SIZE(fmstruct%nrow_locals) - 1)/oblock_size
493 3 : WRITE (io_unit, fm) "CP_FM_STRUCT | CPUs ", &
494 3 : oblock*oblock_size, "..", (oblock + 1)*oblock_size - 1, ": ", &
495 9 : fmstruct%nrow_locals(oblock*oblock_size:MIN(SIZE(fmstruct%nrow_locals), (oblock + 1)*oblock_size) - 1)
496 : END DO
497 3 : END SUBROUTINE cp_fm_struct_write_info
498 :
499 : ! **************************************************************************************************
500 : !> \brief creates a struct with twice the number of blocks on each core.
501 : !> If matrix A has to be multiplied with B anc C, a
502 : !> significant speedup of pdgemm can be acchieved by joining the matrices
503 : !> in a new one with this structure (see arnoldi in rt_matrix_exp)
504 : !> \param fmstruct the struct to create
505 : !> \param struct struct of either A or B
506 : !> \param context ...
507 : !> \param col in which direction the matrix should be enlarged
508 : !> \param row in which direction the matrix should be enlarged
509 : !> \par History
510 : !> 06.2009 created [fschiff]
511 : !> \author Florian Schiffmann
512 : ! **************************************************************************************************
513 5202 : SUBROUTINE cp_fm_struct_double(fmstruct, struct, context, col, row)
514 : TYPE(cp_fm_struct_type), POINTER :: fmstruct
515 : TYPE(cp_fm_struct_type), INTENT(INOUT) :: struct
516 : TYPE(cp_blacs_env_type), INTENT(INOUT), TARGET :: context
517 : LOGICAL, INTENT(in) :: col, row
518 :
519 : INTEGER :: n_doubled_items_in_partially_filled_block, ncol_block, ncol_global, newdim_col, &
520 : newdim_row, nfilled_blocks, nfilled_blocks_remain, nprocs_col, nprocs_row, nrow_block, &
521 : nrow_global
522 : TYPE(mp_para_env_type), POINTER :: para_env
523 :
524 : CALL cp_fm_struct_get(struct, nrow_global=nrow_global, &
525 : ncol_global=ncol_global, nrow_block=nrow_block, &
526 5202 : ncol_block=ncol_block)
527 5202 : newdim_row = nrow_global
528 5202 : newdim_col = ncol_global
529 5202 : nprocs_row = context%num_pe(1)
530 5202 : nprocs_col = context%num_pe(2)
531 5202 : para_env => struct%para_env
532 :
533 5202 : IF (col) THEN
534 5202 : IF (ncol_global == 0) THEN
535 120 : newdim_col = 0
536 : ELSE
537 : ! ncol_block nfilled_blocks_remain * ncol_block
538 : ! |<--->| |<--->|
539 : ! |-----|-----|-----|-----|---|
540 : ! | 0 | 1 | 2 | 0 | 1 | <- context%mepos(2)
541 : ! |-----|-----|-----|-----|---|
542 : ! |<--- nfilled_blocks -->|<-> -- items (columns) in partially filled blocks
543 : ! | * ncol_block |
544 5082 : n_doubled_items_in_partially_filled_block = 2*MOD(ncol_global, ncol_block)
545 5082 : nfilled_blocks = ncol_global/ncol_block
546 5082 : nfilled_blocks_remain = MOD(nfilled_blocks, nprocs_col)
547 5082 : newdim_col = 2*(nfilled_blocks/nprocs_col)
548 5082 : IF (n_doubled_items_in_partially_filled_block > ncol_block) THEN
549 : ! doubled number of columns in a partially filled block does not fit into a single block.
550 : ! Due to cyclic distribution of ScaLAPACK blocks, an extra block for each core needs to be added
551 : ! |-----|-----|-----|----| |-----|-----|-----|-----|-----|-----|-----|-----|-----|---|
552 : ! | 0 | 1 | 2 | 0 | --> | 0 | 1 | 2 | 0 | 1 | 2 | 0 | 1 | 2 | 0|
553 : ! |-----|-----|-----|----| |-----|-----|-----|-----|-----|-----|-----|-----|-----|---|
554 : ! a a a b a1 a1 a1 a2 a2 a2 b1 empty empty b2
555 352 : newdim_col = newdim_col + 1
556 :
557 : ! the number of columns which does not fit into the added extra block
558 352 : n_doubled_items_in_partially_filled_block = n_doubled_items_in_partially_filled_block - ncol_block
559 4730 : ELSE IF (nfilled_blocks_remain > 0) THEN
560 : ! |-----|-----|-----|-----|--| |-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|
561 : ! | 0 | 1 | 2 | 0 | 1| -> | 0 | 1 | 2 | 0 | 1 | 2 | 0 | 1 | 2 | 0 |
562 : ! |-----|-----|-----|-----|--| |-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|
563 : ! a a a b b a1 a1 a1 a2 a2 a2 b1 b1 b2 empty b2
564 0 : newdim_col = newdim_col + 1
565 0 : n_doubled_items_in_partially_filled_block = 0
566 : END IF
567 :
568 5082 : newdim_col = (newdim_col*nprocs_col + nfilled_blocks_remain)*ncol_block + n_doubled_items_in_partially_filled_block
569 : END IF
570 : END IF
571 :
572 5202 : IF (row) THEN
573 0 : IF (nrow_global == 0) THEN
574 0 : newdim_row = 0
575 : ELSE
576 0 : n_doubled_items_in_partially_filled_block = 2*MOD(nrow_global, nrow_block)
577 0 : nfilled_blocks = nrow_global/nrow_block
578 0 : nfilled_blocks_remain = MOD(nfilled_blocks, nprocs_row)
579 0 : newdim_row = 2*(nfilled_blocks/nprocs_row)
580 0 : IF (n_doubled_items_in_partially_filled_block > nrow_block) THEN
581 0 : newdim_row = newdim_row + 1
582 0 : n_doubled_items_in_partially_filled_block = n_doubled_items_in_partially_filled_block - nrow_block
583 0 : ELSE IF (nfilled_blocks_remain > 0) THEN
584 0 : newdim_row = newdim_row + 1
585 0 : n_doubled_items_in_partially_filled_block = 0
586 : END IF
587 :
588 0 : newdim_row = (newdim_row*nprocs_row + nfilled_blocks_remain)*nrow_block + n_doubled_items_in_partially_filled_block
589 : END IF
590 : END IF
591 :
592 : ! square_blocks=.FALSE. ensures that matrix blocks of the doubled matrix will have
593 : ! nrow_block x ncol_block shape even in case of a square doubled matrix
594 : CALL cp_fm_struct_create(fmstruct=fmstruct, para_env=para_env, &
595 : context=context, &
596 : nrow_global=newdim_row, &
597 : ncol_global=newdim_col, &
598 : ncol_block=ncol_block, &
599 : nrow_block=nrow_block, &
600 5202 : square_blocks=.FALSE.)
601 :
602 5202 : END SUBROUTINE cp_fm_struct_double
603 : ! **************************************************************************************************
604 : !> \brief allows to modify the default settings for matrix creation
605 : !> \param nrow_block ...
606 : !> \param ncol_block ...
607 : !> \param force_block ...
608 : ! **************************************************************************************************
609 9127 : SUBROUTINE cp_fm_struct_config(nrow_block, ncol_block, force_block)
610 : INTEGER, INTENT(IN), OPTIONAL :: nrow_block, ncol_block
611 : LOGICAL, INTENT(IN), OPTIONAL :: force_block
612 :
613 9127 : IF (PRESENT(ncol_block)) optimal_blacs_col_block_size = ncol_block
614 9127 : IF (PRESENT(nrow_block)) optimal_blacs_row_block_size = nrow_block
615 9127 : IF (PRESENT(force_block)) force_block_size = force_block
616 :
617 9127 : END SUBROUTINE cp_fm_struct_config
618 :
619 : ! **************************************************************************************************
620 : !> \brief ...
621 : !> \return ...
622 : ! **************************************************************************************************
623 0 : FUNCTION cp_fm_struct_get_nrow_block() RESULT(res)
624 : INTEGER :: res
625 :
626 0 : res = optimal_blacs_row_block_size
627 0 : END FUNCTION cp_fm_struct_get_nrow_block
628 :
629 : ! **************************************************************************************************
630 : !> \brief ...
631 : !> \return ...
632 : ! **************************************************************************************************
633 0 : FUNCTION cp_fm_struct_get_ncol_block() RESULT(res)
634 : INTEGER :: res
635 :
636 0 : res = optimal_blacs_col_block_size
637 0 : END FUNCTION cp_fm_struct_get_ncol_block
638 :
639 : ! **************************************************************************************************
640 : !> \brief wrapper to scalapack function INDXG2P that computes the row process
641 : !> coordinate which possesses the entry of a distributed matrix specified
642 : !> by a global index INDXGLOB.
643 : !> \param struct ...
644 : !> \param INDXGLOB ...
645 : !> \return ...
646 : !> \author Mauro Del Ben [MDB] - 12.2012, modified by F. Stein
647 : ! **************************************************************************************************
648 10256215 : FUNCTION cp_fm_indxg2p_row(struct, INDXGLOB) RESULT(G2P)
649 : CLASS(cp_fm_struct_type), INTENT(IN) :: struct
650 : INTEGER, INTENT(IN) :: INDXGLOB
651 : INTEGER :: G2P
652 :
653 : #if defined(__parallel)
654 : INTEGER :: number_of_process_rows
655 : INTEGER, EXTERNAL :: indxg2p
656 : #endif
657 :
658 : #if defined(__parallel)
659 :
660 10256215 : CALL struct%context%get(number_of_process_rows=number_of_process_rows)
661 :
662 10256215 : G2P = indxg2p(INDXGLOB, struct%nrow_block, 0, struct%first_p_pos(1), number_of_process_rows)
663 :
664 : #else
665 : MARK_USED(struct)
666 : MARK_USED(indxglob)
667 :
668 : G2P = 0
669 :
670 : #endif
671 :
672 10256215 : END FUNCTION cp_fm_indxg2p_row
673 :
674 : ! **************************************************************************************************
675 : !> \brief wrapper to scalapack function INDXG2P that computes the col process
676 : !> coordinate which possesses the entry of a distributed matrix specified
677 : !> by a global index INDXGLOB.
678 : !> \param struct ...
679 : !> \param INDXGLOB ...
680 : !> \return ...
681 : !> \author Mauro Del Ben [MDB] - 12.2012, modified by F. Stein
682 : ! **************************************************************************************************
683 7230388 : FUNCTION cp_fm_indxg2p_col(struct, INDXGLOB) RESULT(G2P)
684 : CLASS(cp_fm_struct_type), INTENT(IN) :: struct
685 : INTEGER, INTENT(IN) :: INDXGLOB
686 : INTEGER :: G2P
687 :
688 : #if defined(__parallel)
689 : INTEGER :: number_of_process_columns
690 : INTEGER, EXTERNAL :: indxg2p
691 : #endif
692 :
693 : #if defined(__parallel)
694 :
695 7230388 : CALL struct%context%get(number_of_process_columns=number_of_process_columns)
696 :
697 7230388 : G2P = indxg2p(INDXGLOB, struct%ncol_block, 0, struct%first_p_pos(2), number_of_process_columns)
698 :
699 : #else
700 : MARK_USED(struct)
701 : MARK_USED(indxglob)
702 :
703 : G2P = 0
704 :
705 : #endif
706 :
707 7230388 : END FUNCTION cp_fm_indxg2p_col
708 :
709 : ! **************************************************************************************************
710 : !> \brief wrapper to scalapack function INDXG2L that computes the local index
711 : !> of a distributed matrix entry pointed to by the global index INDXGLOB.
712 : !>
713 : !> Arguments
714 : !> =========
715 : !>
716 : !> INDXGLOB (global input) INTEGER
717 : !> The global index of the distributed matrix entry.
718 : !>
719 : !> NB (global input) INTEGER
720 : !> Block size, size of the blocks the distributed matrix is
721 : !> split into.
722 : !>
723 : !> IPROC (local dummy) INTEGER
724 : !> Dummy argument in this case in order to unify the calling
725 : !> sequence of the tool-routines.
726 : !>
727 : !> ISRCPROC (local dummy) INTEGER
728 : !> Dummy argument in this case in order to unify the calling
729 : !> sequence of the tool-routines.
730 : !>
731 : !> NPROCS (global input) INTEGER
732 : !> The total number processes over which the distributed
733 : !> matrix is distributed.
734 : !>
735 : !> \param struct ...
736 : !> \param INDXGLOB ...
737 : !> \return ...
738 : !> \author Mauro Del Ben [MDB] - 12.2012
739 : ! **************************************************************************************************
740 1457983 : FUNCTION cp_fm_indxg2l_row(struct, INDXGLOB) RESULT(G2L)
741 : CLASS(cp_fm_struct_type), INTENT(IN) :: struct
742 : INTEGER, INTENT(IN) :: INDXGLOB
743 : INTEGER :: G2L
744 :
745 : #if defined(__parallel)
746 : INTEGER :: number_of_process_rows
747 : INTEGER, EXTERNAL :: indxg2l
748 : #endif
749 :
750 : #if defined(__parallel)
751 :
752 1457983 : CALL struct%context%get(number_of_process_rows=number_of_process_rows)
753 :
754 1457983 : G2L = indxg2l(INDXGLOB, struct%nrow_block, 0, struct%first_p_pos(1), number_of_process_rows)
755 :
756 : #else
757 : MARK_USED(struct)
758 :
759 : G2L = INDXGLOB
760 :
761 : #endif
762 :
763 1457983 : END FUNCTION cp_fm_indxg2l_row
764 :
765 : ! **************************************************************************************************
766 : !> \brief wrapper to scalapack function INDXG2L that computes the local index
767 : !> of a distributed matrix entry pointed to by the global index INDXGLOB.
768 : !>
769 : !> Arguments
770 : !> =========
771 : !>
772 : !> INDXGLOB (global input) INTEGER
773 : !> The global index of the distributed matrix entry.
774 : !>
775 : !> NB (global input) INTEGER
776 : !> Block size, size of the blocks the distributed matrix is
777 : !> split into.
778 : !>
779 : !> IPROC (local dummy) INTEGER
780 : !> Dummy argument in this case in order to unify the calling
781 : !> sequence of the tool-routines.
782 : !>
783 : !> ISRCPROC (local dummy) INTEGER
784 : !> Dummy argument in this case in order to unify the calling
785 : !> sequence of the tool-routines.
786 : !>
787 : !> NPROCS (global input) INTEGER
788 : !> The total number processes over which the distributed
789 : !> matrix is distributed.
790 : !>
791 : !> \param struct ...
792 : !> \param INDXGLOB ...
793 : !> \return ...
794 : !> \author Mauro Del Ben [MDB] - 12.2012
795 : ! **************************************************************************************************
796 450305 : FUNCTION cp_fm_indxg2l_col(struct, INDXGLOB) RESULT(G2L)
797 : CLASS(cp_fm_struct_type), INTENT(IN) :: struct
798 : INTEGER, INTENT(IN) :: INDXGLOB
799 : INTEGER :: G2L
800 :
801 : #if defined(__parallel)
802 : INTEGER :: number_of_process_columns
803 : INTEGER, EXTERNAL :: indxg2l
804 : #endif
805 :
806 : #if defined(__parallel)
807 :
808 450305 : CALL struct%context%get(number_of_process_columns=number_of_process_columns)
809 :
810 450305 : G2L = indxg2l(INDXGLOB, struct%ncol_block, 0, struct%first_p_pos(2), number_of_process_columns)
811 :
812 : #else
813 : MARK_USED(struct)
814 :
815 : G2L = INDXGLOB
816 :
817 : #endif
818 :
819 450305 : END FUNCTION cp_fm_indxg2l_col
820 :
821 : ! **************************************************************************************************
822 : !> \brief wrapper to scalapack function INDXL2G that computes the global index
823 : !> of a distributed matrix entry pointed to by the local index INDXLOC
824 : !> of the process indicated by IPROC.
825 : !>
826 : !> Arguments
827 : !> =========
828 : !>
829 : !> INDXLOC (global input) INTEGER
830 : !> The local index of the distributed matrix entry.
831 : !>
832 : !> NB (global input) INTEGER
833 : !> Block size, size of the blocks the distributed matrix is
834 : !> split into.
835 : !>
836 : !> IPROC (local input) INTEGER
837 : !> The coordinate of the process whose local array row or
838 : !> column is to be determined.
839 : !>
840 : !> ISRCPROC (global input) INTEGER
841 : !> The coordinate of the process that possesses the first
842 : !> row/column of the distributed matrix.
843 : !>
844 : !> NPROCS (global input) INTEGER
845 : !> The total number processes over which the distributed
846 : !> matrix is distributed.
847 : !>
848 : !> \param struct ...
849 : !> \param INDXLOC ...
850 : !> \param IPROC ...
851 : !> \return ...
852 : !> \author Mauro Del Ben [MDB] - 12.2012
853 : ! **************************************************************************************************
854 5495005 : FUNCTION cp_fm_indxl2g_row(struct, INDXLOC, IPROC) RESULT(L2G)
855 : CLASS(cp_fm_struct_type), INTENT(IN) :: struct
856 : INTEGER, INTENT(IN) :: INDXLOC, IPROC
857 : INTEGER :: L2G
858 :
859 : #if defined(__parallel)
860 : INTEGER :: number_of_process_rows
861 : INTEGER, EXTERNAL :: indxl2g
862 :
863 5495005 : CALL struct%context%get(number_of_process_rows=number_of_process_rows)
864 :
865 5495005 : L2G = indxl2g(INDXLOC, struct%nrow_block, IPROC, struct%first_p_pos(1), number_of_process_rows)
866 :
867 : #else
868 : MARK_USED(struct)
869 : MARK_USED(indxloc)
870 : MARK_USED(iproc)
871 :
872 : L2G = INDXLOC
873 :
874 : #endif
875 :
876 5495005 : END FUNCTION cp_fm_indxl2g_row
877 :
878 : ! **************************************************************************************************
879 : !> \brief wrapper to scalapack function INDXL2G that computes the global index
880 : !> of a distributed matrix entry pointed to by the local index INDXLOC
881 : !> of the process indicated by IPROC.
882 : !>
883 : !> Arguments
884 : !> =========
885 : !>
886 : !> INDXLOC (global input) INTEGER
887 : !> The local index of the distributed matrix entry.
888 : !>
889 : !> NB (global input) INTEGER
890 : !> Block size, size of the blocks the distributed matrix is
891 : !> split into.
892 : !>
893 : !> IPROC (local input) INTEGER
894 : !> The coordinate of the process whose local array row or
895 : !> column is to be determined.
896 : !>
897 : !> ISRCPROC (global input) INTEGER
898 : !> The coordinate of the process that possesses the first
899 : !> row/column of the distributed matrix.
900 : !>
901 : !> NPROCS (global input) INTEGER
902 : !> The total number processes over which the distributed
903 : !> matrix is distributed.
904 : !>
905 : !> \param struct ...
906 : !> \param INDXLOC ...
907 : !> \param IPROC ...
908 : !> \return ...
909 : !> \author Mauro Del Ben [MDB] - 12.2012
910 : ! **************************************************************************************************
911 5798283 : FUNCTION cp_fm_indxl2g_col(struct, INDXLOC, IPROC) RESULT(L2G)
912 : CLASS(cp_fm_struct_type), INTENT(IN) :: struct
913 : INTEGER, INTENT(IN) :: INDXLOC, IPROC
914 : INTEGER :: L2G
915 :
916 : #if defined(__parallel)
917 : INTEGER :: number_of_process_columns
918 : INTEGER, EXTERNAL :: indxl2g
919 :
920 5798283 : CALL struct%context%get(number_of_process_columns=number_of_process_columns)
921 :
922 5798283 : L2G = indxl2g(INDXLOC, struct%ncol_block, IPROC, struct%first_p_pos(2), number_of_process_columns)
923 :
924 : #else
925 : MARK_USED(struct)
926 : MARK_USED(indxloc)
927 : MARK_USED(iproc)
928 :
929 : L2G = INDXLOC
930 :
931 : #endif
932 :
933 5798283 : END FUNCTION cp_fm_indxl2g_col
934 :
935 0 : END MODULE cp_fm_struct
|