Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief represent a full matrix distributed on many processors
10 : !> \par History
11 : !> 3) separated structure object, removed globenv, renamed to full matrix
12 : !> many changes (fawzi 08.2002)
13 : !> \author Matthias Krack (22.05.2001)
14 : ! **************************************************************************************************
15 : MODULE cp_fm_types
16 : USE cp_blacs_env, ONLY: cp_blacs_env_type
17 : USE cp_blacs_types, ONLY: cp_blacs_type
18 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent,&
19 : cp_fm_struct_get,&
20 : cp_fm_struct_release,&
21 : cp_fm_struct_retain,&
22 : cp_fm_struct_type,&
23 : cp_fm_struct_write_info
24 : USE kinds, ONLY: dp,&
25 : sp
26 : USE message_passing, ONLY: cp2k_is_parallel,&
27 : mp_any_source,&
28 : mp_para_env_type,&
29 : mp_proc_null,&
30 : mp_request_null,&
31 : mp_request_type,&
32 : mp_waitall
33 : USE parallel_rng_types, ONLY: UNIFORM,&
34 : rng_stream_type
35 : #include "../base/base_uses.f90"
36 :
37 : IMPLICIT NONE
38 :
39 : PRIVATE
40 :
41 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
42 : LOGICAL, PARAMETER :: debug_this_module = .TRUE.
43 : INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
44 :
45 : INTEGER, PRIVATE :: cp_fm_mm_type = 1
46 :
47 : PUBLIC :: cp_fm_type, &
48 : cp_fm_p_type, copy_info_type
49 :
50 : PUBLIC :: cp_fm_add_to_element, &
51 : cp_fm_create, &
52 : cp_fm_release, &
53 : cp_fm_get_info, &
54 : cp_fm_set_element, &
55 : cp_fm_get_element, &
56 : cp_fm_get_diag, & ! get diagonal
57 : cp_fm_set_all, & ! set all elements and diagonal
58 : cp_fm_set_submatrix, & ! set a submatrix to given values
59 : cp_fm_get_submatrix, & ! get a submatrix of given values
60 : cp_fm_init_random, &
61 : cp_fm_maxabsval, & ! find the maximum absolute value
62 : cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
63 : cp_fm_to_fm, & ! copy (parts of) a fm to a fm
64 : cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
65 : cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
66 : cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
67 : cp_fm_to_fm_triangular, &
68 : cp_fm_copy_general, &
69 : cp_fm_start_copy_general, &
70 : cp_fm_finish_copy_general, &
71 : cp_fm_cleanup_copy_general, &
72 : cp_fm_write_unformatted, & ! writes a full matrix to an open unit
73 : cp_fm_write_formatted, & ! writes a full matrix to an open unit
74 : cp_fm_read_unformatted, & ! reads a full matrix from an open unit
75 : cp_fm_setup, & ! allows to set flags for fms
76 : cp_fm_get_mm_type, &
77 : cp_fm_write_info, &
78 : cp_fm_to_fm_submat_general ! copy matrix across different contexts
79 :
80 : PUBLIC :: cp_fm_pilaenv
81 :
82 : INTERFACE cp_fm_to_fm
83 : MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
84 : cp_fm_to_fm_columns ! just a number of columns
85 : END INTERFACE
86 :
87 : INTERFACE cp_fm_release
88 : MODULE PROCEDURE cp_fm_release_aa0, &
89 : cp_fm_release_aa1, &
90 : cp_fm_release_aa2, &
91 : cp_fm_release_aa3, &
92 : cp_fm_release_ap1, &
93 : cp_fm_release_ap2, &
94 : cp_fm_release_pa1, &
95 : cp_fm_release_pa2, &
96 : cp_fm_release_pa3, &
97 : cp_fm_release_pp1, &
98 : cp_fm_release_pp2
99 : END INTERFACE
100 :
101 : ! **************************************************************************************************
102 : !> \brief represent a full matrix
103 : !> \param name the name of the matrix, used for printing
104 : !> \param matrix_struct structure of this matrix
105 : !> \param local_data array with the data of the matrix (its contents
106 : !> depend on the matrix type used: in parallel runs it will be
107 : !> in scalapack format, in sequential, it will simply contain
108 : !> the matrix)
109 : !> \par History
110 : !> 08.2002 created [fawzi]
111 : !> \author fawzi
112 : ! **************************************************************************************************
113 : TYPE cp_fm_type
114 : ! PRIVATE
115 : CHARACTER(LEN=60) :: name = ""
116 : LOGICAL :: use_sp = .FALSE.
117 : TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
118 : REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
119 : REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => NULL()
120 : END TYPE cp_fm_type
121 :
122 : ! **************************************************************************************************
123 : !> \brief just to build arrays of pointers to matrices
124 : !> \param matrix the pointer to the matrix
125 : !> \par History
126 : !> 08.2002 created [fawzi]
127 : !> \author fawzi
128 : ! **************************************************************************************************
129 : TYPE cp_fm_p_type
130 : TYPE(cp_fm_type), POINTER :: matrix => NULL()
131 : END TYPE cp_fm_p_type
132 :
133 : ! **************************************************************************************************
134 : !> \brief Stores the state of a copy between cp_fm_start_copy_general
135 : !> and cp_fm_finish_copy_general
136 : !> \par History
137 : !> Jan 2017 [Mark T]
138 : ! **************************************************************************************************
139 : TYPE copy_info_type
140 : INTEGER :: send_size = -1
141 : INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
142 : TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
143 : INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
144 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices => NULL(), recv_row_indices => NULL()
145 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
146 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
147 : END TYPE copy_info_type
148 :
149 : CONTAINS
150 :
151 : ! **************************************************************************************************
152 : !> \brief creates a new full matrix with the given structure
153 : !> \param matrix the matrix to be created
154 : !> \param matrix_struct the structure of matrix
155 : !> \param name ...
156 : !> \param use_sp ...
157 : !> \par History
158 : !> 08.2002 created [fawzi]
159 : !> \author Fawzi Mohamed
160 : !> \note
161 : !> preferred allocation routine
162 : ! **************************************************************************************************
163 1275188 : SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
164 : TYPE(cp_fm_type), INTENT(OUT) :: matrix
165 : TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
166 : CHARACTER(LEN=*), INTENT(in), OPTIONAL :: name
167 : LOGICAL, INTENT(in), OPTIONAL :: use_sp
168 :
169 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_create'
170 :
171 : INTEGER :: handle, ncol_local, npcol, nprow, &
172 : nrow_local
173 : TYPE(cp_blacs_env_type), POINTER :: context
174 :
175 1275188 : CALL timeset(routineN, handle)
176 :
177 1275188 : context => matrix_struct%context
178 1275188 : matrix%matrix_struct => matrix_struct
179 1275188 : CALL cp_fm_struct_retain(matrix%matrix_struct)
180 :
181 1275188 : matrix%use_sp = .FALSE.
182 1275188 : IF (PRESENT(use_sp)) matrix%use_sp = use_sp
183 :
184 1275188 : nprow = context%num_pe(1)
185 1275188 : npcol = context%num_pe(2)
186 1275188 : NULLIFY (matrix%local_data)
187 1275188 : NULLIFY (matrix%local_data_sp)
188 :
189 : ! OK, we allocate here at least a 1 x 1 matrix
190 : ! this must (and is) compatible with the descinit call
191 : ! in cp_fm_struct
192 1275188 : nrow_local = matrix_struct%local_leading_dimension
193 1275188 : ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
194 1275188 : IF (matrix%use_sp) THEN
195 0 : ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
196 : ELSE
197 5100752 : ALLOCATE (matrix%local_data(nrow_local, ncol_local))
198 : END IF
199 :
200 : ! JVDV we should remove this, as it is up to the user to zero afterwards
201 1275188 : IF (matrix%use_sp) THEN
202 0 : matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
203 : ELSE
204 615356432 : matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
205 : END IF
206 :
207 1275188 : IF (PRESENT(name)) THEN
208 630087 : matrix%name = name
209 : ELSE
210 645101 : matrix%name = 'full matrix'
211 : END IF
212 :
213 1275188 : CALL timestop(handle)
214 :
215 1275188 : END SUBROUTINE cp_fm_create
216 :
217 : ! **************************************************************************************************
218 : !> \brief releases a full matrix
219 : !> \param matrix the matrix to release
220 : !> \par History
221 : !> 08.2002 created [fawzi]
222 : !> \author Fawzi Mohamed
223 : ! **************************************************************************************************
224 1287100 : SUBROUTINE cp_fm_release_aa0(matrix)
225 : TYPE(cp_fm_type), INTENT(INOUT) :: matrix
226 :
227 1287100 : IF (ASSOCIATED(matrix%local_data)) THEN
228 1274488 : DEALLOCATE (matrix%local_data)
229 : NULLIFY (matrix%local_data)
230 : END IF
231 1287100 : IF (ASSOCIATED(matrix%local_data_sp)) THEN
232 0 : DEALLOCATE (matrix%local_data_sp)
233 : NULLIFY (matrix%local_data_sp)
234 : END IF
235 1287100 : matrix%name = ""
236 1287100 : CALL cp_fm_struct_release(matrix%matrix_struct)
237 :
238 1287100 : END SUBROUTINE cp_fm_release_aa0
239 :
240 : ! **************************************************************************************************
241 : !> \brief ...
242 : !> \param matrices ...
243 : ! **************************************************************************************************
244 71770 : SUBROUTINE cp_fm_release_aa1(matrices)
245 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
246 :
247 : INTEGER :: i
248 :
249 71770 : IF (ALLOCATED(matrices)) THEN
250 194571 : DO i = 1, SIZE(matrices)
251 194571 : CALL cp_fm_release(matrices(i))
252 : END DO
253 70682 : DEALLOCATE (matrices)
254 : END IF
255 71770 : END SUBROUTINE cp_fm_release_aa1
256 :
257 : ! **************************************************************************************************
258 : !> \brief ...
259 : !> \param matrices ...
260 : ! **************************************************************************************************
261 7864 : SUBROUTINE cp_fm_release_aa2(matrices)
262 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
263 :
264 : INTEGER :: i, j
265 :
266 7864 : IF (ALLOCATED(matrices)) THEN
267 19016 : DO i = 1, SIZE(matrices, 1)
268 51362 : DO j = 1, SIZE(matrices, 2)
269 45774 : CALL cp_fm_release(matrices(i, j))
270 : END DO
271 : END DO
272 5588 : DEALLOCATE (matrices)
273 : END IF
274 7864 : END SUBROUTINE cp_fm_release_aa2
275 :
276 : ! **************************************************************************************************
277 : !> \brief ...
278 : !> \param matrices ...
279 : ! **************************************************************************************************
280 42 : SUBROUTINE cp_fm_release_aa3(matrices)
281 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
282 :
283 : INTEGER :: i, j, k
284 :
285 42 : IF (ALLOCATED(matrices)) THEN
286 454 : DO i = 1, SIZE(matrices, 1)
287 1306 : DO j = 1, SIZE(matrices, 2)
288 2276 : DO k = 1, SIZE(matrices, 3)
289 1864 : CALL cp_fm_release(matrices(i, j, k))
290 : END DO
291 : END DO
292 : END DO
293 42 : DEALLOCATE (matrices)
294 : END IF
295 42 : END SUBROUTINE cp_fm_release_aa3
296 :
297 : ! **************************************************************************************************
298 : !> \brief ...
299 : !> \param matrices ...
300 : ! **************************************************************************************************
301 149932 : SUBROUTINE cp_fm_release_pa1(matrices)
302 : TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
303 :
304 : INTEGER :: i
305 :
306 149932 : IF (ASSOCIATED(matrices)) THEN
307 119347 : DO i = 1, SIZE(matrices)
308 119347 : CALL cp_fm_release(matrices(i))
309 : END DO
310 51188 : DEALLOCATE (matrices)
311 : NULLIFY (matrices)
312 : END IF
313 149932 : END SUBROUTINE cp_fm_release_pa1
314 :
315 : ! **************************************************************************************************
316 : !> \brief ...
317 : !> \param matrices ...
318 : ! **************************************************************************************************
319 21264 : SUBROUTINE cp_fm_release_pa2(matrices)
320 : TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
321 :
322 : INTEGER :: i, j
323 :
324 21264 : IF (ASSOCIATED(matrices)) THEN
325 44666 : DO i = 1, SIZE(matrices, 1)
326 87778 : DO j = 1, SIZE(matrices, 2)
327 76750 : CALL cp_fm_release(matrices(i, j))
328 : END DO
329 : END DO
330 11028 : DEALLOCATE (matrices)
331 : NULLIFY (matrices)
332 : END IF
333 21264 : END SUBROUTINE cp_fm_release_pa2
334 :
335 : ! **************************************************************************************************
336 : !> \brief ...
337 : !> \param matrices ...
338 : ! **************************************************************************************************
339 0 : SUBROUTINE cp_fm_release_pa3(matrices)
340 : TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
341 :
342 : INTEGER :: i, j, k
343 :
344 0 : IF (ASSOCIATED(matrices)) THEN
345 0 : DO i = 1, SIZE(matrices, 1)
346 0 : DO j = 1, SIZE(matrices, 2)
347 0 : DO k = 1, SIZE(matrices, 3)
348 0 : CALL cp_fm_release(matrices(i, j, k))
349 : END DO
350 : END DO
351 : END DO
352 0 : DEALLOCATE (matrices)
353 : NULLIFY (matrices)
354 : END IF
355 0 : END SUBROUTINE cp_fm_release_pa3
356 :
357 : ! **************************************************************************************************
358 : !> \brief ...
359 : !> \param matrices ...
360 : ! **************************************************************************************************
361 0 : SUBROUTINE cp_fm_release_ap1(matrices)
362 : TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
363 :
364 : INTEGER :: i
365 :
366 0 : IF (ALLOCATED(matrices)) THEN
367 0 : DO i = 1, SIZE(matrices)
368 0 : CALL cp_fm_release(matrices(i)%matrix)
369 0 : DEALLOCATE (matrices(i)%matrix)
370 : END DO
371 0 : DEALLOCATE (matrices)
372 : END IF
373 0 : END SUBROUTINE cp_fm_release_ap1
374 :
375 : ! **************************************************************************************************
376 : !> \brief ...
377 : !> \param matrices ...
378 : ! **************************************************************************************************
379 0 : SUBROUTINE cp_fm_release_ap2(matrices)
380 : TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
381 :
382 : INTEGER :: i, j
383 :
384 0 : IF (ALLOCATED(matrices)) THEN
385 0 : DO i = 1, SIZE(matrices, 1)
386 0 : DO j = 1, SIZE(matrices, 2)
387 0 : CALL cp_fm_release(matrices(i, j)%matrix)
388 0 : DEALLOCATE (matrices(i, j)%matrix)
389 : END DO
390 : END DO
391 0 : DEALLOCATE (matrices)
392 : END IF
393 0 : END SUBROUTINE cp_fm_release_ap2
394 :
395 : ! **************************************************************************************************
396 : !> \brief ...
397 : !> \param matrices ...
398 : ! **************************************************************************************************
399 0 : SUBROUTINE cp_fm_release_pp1(matrices)
400 : TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
401 :
402 : INTEGER :: i
403 :
404 0 : IF (ASSOCIATED(matrices)) THEN
405 0 : DO i = 1, SIZE(matrices)
406 0 : CALL cp_fm_release(matrices(i)%matrix)
407 0 : DEALLOCATE (matrices(i)%matrix)
408 : END DO
409 0 : DEALLOCATE (matrices)
410 : NULLIFY (matrices)
411 : END IF
412 0 : END SUBROUTINE cp_fm_release_pp1
413 :
414 : ! **************************************************************************************************
415 : !> \brief ...
416 : !> \param matrices ...
417 : ! **************************************************************************************************
418 0 : SUBROUTINE cp_fm_release_pp2(matrices)
419 : TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
420 :
421 : INTEGER :: i, j
422 :
423 0 : IF (ASSOCIATED(matrices)) THEN
424 0 : DO i = 1, SIZE(matrices, 1)
425 0 : DO j = 1, SIZE(matrices, 2)
426 0 : CALL cp_fm_release(matrices(i, j)%matrix)
427 0 : DEALLOCATE (matrices(i, j)%matrix)
428 : END DO
429 : END DO
430 0 : DEALLOCATE (matrices)
431 : NULLIFY (matrices)
432 : END IF
433 0 : END SUBROUTINE cp_fm_release_pp2
434 :
435 : ! **************************************************************************************************
436 : !> \brief fills a matrix with random numbers
437 : !> \param matrix : to be initialized
438 : !> \param ncol : numbers of cols to fill
439 : !> \param start_col : starting at coll number
440 : !> \author Joost VandeVondele
441 : !> \note
442 : !> the value of a_ij is independent of the number of cpus
443 : ! **************************************************************************************************
444 2622 : SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
445 : TYPE(cp_fm_type), INTENT(IN) :: matrix
446 : INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
447 :
448 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_init_random'
449 :
450 : INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
451 : ncol_local, nrow_global, nrow_local
452 5244 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
453 2622 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: buff
454 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
455 2622 : POINTER :: local_data
456 : REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
457 : seed = RESHAPE((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
458 : TYPE(rng_stream_type) :: rng
459 :
460 2622 : CALL timeset(routineN, handle)
461 :
462 : ! guarantee same seed over all tasks
463 2622 : CALL matrix%matrix_struct%para_env%bcast(seed, 0)
464 :
465 : rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
466 2622 : extended_precision=.TRUE., seed=seed)
467 :
468 2622 : CPASSERT(.NOT. matrix%use_sp)
469 :
470 : CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
471 : nrow_local=nrow_local, ncol_local=ncol_local, &
472 : local_data=local_data, &
473 2622 : row_indices=row_indices, col_indices=col_indices)
474 :
475 2622 : my_start_col = 1
476 2622 : IF (PRESENT(start_col)) my_start_col = start_col
477 2622 : my_ncol = matrix%matrix_struct%ncol_global
478 2622 : IF (PRESENT(ncol)) my_ncol = ncol
479 :
480 2622 : IF (ncol_global < (my_start_col + my_ncol - 1)) &
481 0 : CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
482 :
483 7866 : ALLOCATE (buff(nrow_global))
484 :
485 : ! each global row has its own substream, in order to reach the stream for the local col,
486 : ! we just reset to the next substream
487 : ! following this, we fill the full buff with random numbers, and pick those we need
488 2622 : icol_global = 0
489 21429 : DO icol_local = 1, ncol_local
490 18807 : CPASSERT(col_indices(icol_local) > icol_global)
491 : DO
492 18807 : CALL rng%reset_to_next_substream()
493 18807 : icol_global = icol_global + 1
494 18807 : IF (icol_global == col_indices(icol_local)) EXIT
495 : END DO
496 18807 : CALL rng%fill(buff)
497 785427 : DO irow_local = 1, nrow_local
498 782805 : local_data(irow_local, icol_local) = buff(row_indices(irow_local))
499 : END DO
500 : END DO
501 :
502 2622 : DEALLOCATE (buff)
503 :
504 : ! store seed before deletion (unclear if this is the proper seed)
505 :
506 : ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
507 : ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
508 : ! stream of random numbers. While this seems odd and contrary to the original design,
509 : ! it was probably introduced to improve reproducibility.
510 : ! See also https://github.com/cp2k/cp2k/pull/506
511 2622 : CALL rng%get(ig=seed)
512 :
513 2622 : CALL timestop(handle)
514 :
515 73416 : END SUBROUTINE cp_fm_init_random
516 :
517 : ! **************************************************************************************************
518 : !> \brief set all elements of a matrix to the same value,
519 : !> and optionally the diagonal to a different one
520 : !> \param matrix input matrix
521 : !> \param alpha scalar used to set all elements of the matrix
522 : !> \param beta scalar used to set diagonal of the matrix
523 : !> \note
524 : !> can be used to zero a matrix
525 : !> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
526 : ! **************************************************************************************************
527 334243 : SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
528 :
529 : TYPE(cp_fm_type), INTENT(IN) :: matrix
530 : REAL(KIND=dp), INTENT(IN) :: alpha
531 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: beta
532 :
533 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_set_all'
534 :
535 : INTEGER :: handle, i, n
536 :
537 334243 : CALL timeset(routineN, handle)
538 :
539 334243 : IF (matrix%use_sp) THEN
540 0 : matrix%local_data_sp(:, :) = REAL(alpha, sp)
541 : ELSE
542 206631218 : matrix%local_data(:, :) = alpha
543 : END IF
544 :
545 334243 : IF (PRESENT(beta)) THEN
546 55192 : CPASSERT(.NOT. matrix%use_sp)
547 55192 : n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
548 513760 : DO i = 1, n
549 513760 : CALL cp_fm_set_element(matrix, i, i, beta)
550 : END DO
551 : END IF
552 :
553 334243 : CALL timestop(handle)
554 :
555 334243 : END SUBROUTINE cp_fm_set_all
556 :
557 : ! **************************************************************************************************
558 : !> \brief returns the diagonal elements of a fm
559 : !> \param matrix ...
560 : !> \param diag ...
561 : ! **************************************************************************************************
562 16520 : SUBROUTINE cp_fm_get_diag(matrix, diag)
563 :
564 : ! arguments
565 : TYPE(cp_fm_type), INTENT(IN) :: matrix
566 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
567 :
568 : ! locals
569 : INTEGER :: i, nrow_global
570 :
571 : #if defined(__parallel)
572 : INTEGER, DIMENSION(9) :: desca
573 : TYPE(cp_blacs_env_type), POINTER :: context
574 : INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
575 : nprow
576 16520 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
577 16520 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
578 : #endif
579 :
580 16520 : CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
581 :
582 : #if defined(__parallel)
583 202226 : diag = 0.0_dp
584 16520 : context => matrix%matrix_struct%context
585 16520 : myprow = context%mepos(1)
586 16520 : mypcol = context%mepos(2)
587 16520 : nprow = context%num_pe(1)
588 16520 : npcol = context%num_pe(2)
589 :
590 16520 : a => matrix%local_data
591 16520 : a_sp => matrix%local_data_sp
592 165200 : desca(:) = matrix%matrix_struct%descriptor(:)
593 :
594 202226 : DO i = 1, nrow_global
595 : CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
596 185706 : irow_local, icol_local, iprow, ipcol)
597 202226 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
598 92937 : IF (matrix%use_sp) THEN
599 0 : diag(i) = REAL(a_sp(irow_local, icol_local), dp)
600 : ELSE
601 92937 : diag(i) = a(irow_local, icol_local)
602 : END IF
603 : END IF
604 : END DO
605 : #else
606 : DO i = 1, nrow_global
607 : IF (matrix%use_sp) THEN
608 : diag(i) = REAL(matrix%local_data_sp(i, i), dp)
609 : ELSE
610 : diag(i) = matrix%local_data(i, i)
611 : END IF
612 : END DO
613 : #endif
614 387932 : CALL matrix%matrix_struct%para_env%sum(diag)
615 :
616 16520 : END SUBROUTINE cp_fm_get_diag
617 :
618 : ! **************************************************************************************************
619 : !> \brief returns an element of a fm
620 : !> this value is valid on every cpu
621 : !> using this call is expensive
622 : !> \param matrix the matrix to read
623 : !> \param irow_global the row
624 : !> \param icol_global the col
625 : !> \param alpha the value of matrix(irow_global, icol_global)
626 : !> \param local true if the element is on this cpu, false otherwise
627 : !> \note
628 : !> - modified semantics. now this function always returns the value
629 : !> previously the value was zero on cpus that didn't own the relevant
630 : !> part of the matrix (Joost VandeVondele, May 2003)
631 : !> - usage of the function should be avoided, as it is likely to rather slow
632 : !> using row_indices/col_indices/local_data + some smart scheme normally
633 : !> yields a real parallel code
634 : ! **************************************************************************************************
635 2465852 : SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
636 :
637 : ! arguments
638 : TYPE(cp_fm_type), INTENT(IN) :: matrix
639 : REAL(KIND=dp), INTENT(OUT) :: alpha
640 : INTEGER, INTENT(IN) :: icol_global, &
641 : irow_global
642 : LOGICAL, INTENT(OUT), OPTIONAL :: local
643 :
644 : ! locals
645 : #if defined(__parallel)
646 : INTEGER, DIMENSION(9) :: desca
647 : TYPE(cp_blacs_env_type), POINTER :: context
648 : INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
649 : nprow
650 2465852 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
651 : #endif
652 :
653 : #if defined(__parallel)
654 2465852 : context => matrix%matrix_struct%context
655 2465852 : myprow = context%mepos(1)
656 2465852 : mypcol = context%mepos(2)
657 2465852 : nprow = context%num_pe(1)
658 2465852 : npcol = context%num_pe(2)
659 :
660 2465852 : a => matrix%local_data
661 24658520 : desca(:) = matrix%matrix_struct%descriptor(:)
662 :
663 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
664 2465852 : irow_local, icol_local, iprow, ipcol)
665 :
666 2465852 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
667 1232926 : alpha = a(irow_local, icol_local)
668 1232926 : CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
669 1232926 : IF (PRESENT(local)) local = .TRUE.
670 : ELSE
671 1232926 : CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
672 1232926 : IF (PRESENT(local)) local = .FALSE.
673 : END IF
674 :
675 : #else
676 : IF (PRESENT(local)) local = .TRUE.
677 : alpha = matrix%local_data(irow_global, icol_global)
678 : #endif
679 :
680 2465852 : END SUBROUTINE cp_fm_get_element
681 :
682 : ! **************************************************************************************************
683 : !> \brief sets an element of a matrix
684 : !> \param matrix ...
685 : !> \param irow_global ...
686 : !> \param icol_global ...
687 : !> \param alpha ...
688 : !> \note
689 : !> we expect all cpus to have the same arguments in the call to this function
690 : !> (otherwise one should use local_data tricks)
691 : ! **************************************************************************************************
692 862216 : SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
693 : TYPE(cp_fm_type), INTENT(IN) :: matrix
694 : INTEGER, INTENT(IN) :: irow_global, icol_global
695 : REAL(KIND=dp), INTENT(IN) :: alpha
696 :
697 : INTEGER :: mypcol, myprow, npcol, nprow
698 : TYPE(cp_blacs_env_type), POINTER :: context
699 : #if defined(__parallel)
700 : INTEGER :: icol_local, ipcol, iprow, &
701 : irow_local
702 : INTEGER, DIMENSION(9) :: desca
703 862216 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
704 : #endif
705 :
706 862216 : context => matrix%matrix_struct%context
707 862216 : myprow = context%mepos(1)
708 862216 : mypcol = context%mepos(2)
709 862216 : nprow = context%num_pe(1)
710 862216 : npcol = context%num_pe(2)
711 :
712 0 : CPASSERT(.NOT. matrix%use_sp)
713 :
714 : #if defined(__parallel)
715 :
716 862216 : a => matrix%local_data
717 :
718 8622160 : desca(:) = matrix%matrix_struct%descriptor(:)
719 :
720 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
721 862216 : irow_local, icol_local, iprow, ipcol)
722 :
723 862216 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
724 435263 : a(irow_local, icol_local) = alpha
725 : END IF
726 :
727 : #else
728 :
729 : matrix%local_data(irow_global, icol_global) = alpha
730 :
731 : #endif
732 862216 : END SUBROUTINE cp_fm_set_element
733 :
734 : ! **************************************************************************************************
735 : !> \brief sets a submatrix of a full matrix
736 : !> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
737 : !> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
738 : !> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
739 : !> \param fm the full to change
740 : !> \param new_values a replicated full matrix with the new values
741 : !> \param start_row the starting row of b_matrix (defaults to 1)
742 : !> \param start_col the starting col of b_matrix (defaults to 1)
743 : !> \param n_rows the number of row to change in b (defaults to
744 : !> size(op(new_values),1))
745 : !> \param n_cols the number of columns to change in b (defaults to
746 : !> size(op(new_values),2))
747 : !> \param alpha rescaling factor for the new values (defaults to 1.0)
748 : !> \param beta rescaling factor for the old values (defaults to 0.0)
749 : !> \param transpose if new_values should be transposed: if true
750 : !> op(new_values)=new_values^T, else op(new_values)=new_values
751 : !> (defaults to false)
752 : !> \par History
753 : !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
754 : !> \author Fawzi Mohamed
755 : !> \note
756 : !> optimized for full column updates and alpha=1.0, beta=0.0
757 : !> the new_values need to be valid on all cpus
758 : ! **************************************************************************************************
759 62609 : SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
760 : start_col, n_rows, n_cols, alpha, beta, transpose)
761 : TYPE(cp_fm_type), INTENT(IN) :: fm
762 : REAL(KIND=dp), DIMENSION(:, :), INTENT(in) :: new_values
763 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
764 : REAL(KIND=dp), INTENT(in), OPTIONAL :: alpha, beta
765 : LOGICAL, INTENT(in), OPTIONAL :: transpose
766 :
767 : INTEGER :: i, i0, j, j0, ncol, ncol_block, &
768 : ncol_global, ncol_local, nrow, &
769 : nrow_block, nrow_global, nrow_local, &
770 : this_col, this_row
771 62609 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
772 : LOGICAL :: tr_a
773 : REAL(KIND=dp) :: al, be
774 62609 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: full_block
775 :
776 62609 : al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
777 : ! can be called too many times, making it a bit useless
778 : ! CALL timeset(routineN//','//moduleN,handle)
779 :
780 0 : CPASSERT(.NOT. fm%use_sp)
781 :
782 62609 : IF (PRESENT(alpha)) al = alpha
783 62609 : IF (PRESENT(beta)) be = beta
784 62609 : IF (PRESENT(start_row)) i0 = start_row
785 62609 : IF (PRESENT(start_col)) j0 = start_col
786 62609 : IF (PRESENT(transpose)) tr_a = transpose
787 15683 : IF (tr_a) THEN
788 15591 : nrow = SIZE(new_values, 2)
789 15591 : ncol = SIZE(new_values, 1)
790 : ELSE
791 47018 : nrow = SIZE(new_values, 1)
792 47018 : ncol = SIZE(new_values, 2)
793 : END IF
794 62609 : IF (PRESENT(n_rows)) nrow = n_rows
795 62609 : IF (PRESENT(n_cols)) ncol = n_cols
796 :
797 62609 : full_block => fm%local_data
798 :
799 : CALL cp_fm_get_info(matrix=fm, &
800 : nrow_global=nrow_global, ncol_global=ncol_global, &
801 : nrow_block=nrow_block, ncol_block=ncol_block, &
802 : nrow_local=nrow_local, ncol_local=ncol_local, &
803 62609 : row_indices=row_indices, col_indices=col_indices)
804 :
805 62609 : IF (al == 1.0 .AND. be == 0.0) THEN
806 1302958 : DO j = 1, ncol_local
807 1249487 : this_col = col_indices(j) - j0 + 1
808 1302958 : IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
809 285169 : IF (tr_a) THEN
810 6453 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
811 157620 : DO i = 1, nrow_local
812 157620 : full_block(i, j) = new_values(this_col, row_indices(i))
813 : END DO
814 : ELSE
815 594 : DO i = 1, nrow_local
816 510 : this_row = row_indices(i) - i0 + 1
817 594 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
818 255 : full_block(i, j) = new_values(this_col, this_row)
819 : END IF
820 : END DO
821 : END IF
822 : ELSE
823 278716 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
824 8919342 : DO i = 1, nrow_local
825 8919342 : full_block(i, j) = new_values(row_indices(i), this_col)
826 : END DO
827 : ELSE
828 4347 : DO i = 1, nrow_local
829 3745 : this_row = row_indices(i) - i0 + 1
830 4347 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
831 301 : full_block(i, j) = new_values(this_row, this_col)
832 : END IF
833 : END DO
834 : END IF
835 : END IF
836 : END IF
837 : END DO
838 : ELSE
839 831608 : DO j = 1, ncol_local
840 822470 : this_col = col_indices(j) - j0 + 1
841 831608 : IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
842 822470 : IF (tr_a) THEN
843 88223375 : DO i = 1, nrow_local
844 87400905 : this_row = row_indices(i) - i0 + 1
845 88223375 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
846 : full_block(i, j) = al*new_values(this_col, this_row) + &
847 411235 : be*full_block(i, j)
848 : END IF
849 : END DO
850 : ELSE
851 0 : DO i = 1, nrow_local
852 0 : this_row = row_indices(i) - i0 + 1
853 0 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
854 : full_block(i, j) = al*new_values(this_row, this_col) + &
855 0 : be*full_block(i, j)
856 : END IF
857 : END DO
858 : END IF
859 : END IF
860 : END DO
861 : END IF
862 :
863 : ! CALL timestop(handle)
864 :
865 62609 : END SUBROUTINE cp_fm_set_submatrix
866 :
867 : ! **************************************************************************************************
868 : !> \brief gets a submatrix of a full matrix
869 : !> op(target_m)(1:n_rows,1:n_cols)
870 : !> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
871 : !> target_m is replicated on all cpus
872 : !> using this call is expensive
873 : !> \param fm the full you want to get the info from
874 : !> \param target_m a replicated full matrix that will contain the result
875 : !> \param start_row the starting row of b_matrix (defaults to 1)
876 : !> \param start_col the starting col of b_matrix (defaults to 1)
877 : !> \param n_rows the number of row to change in b (defaults to
878 : !> size(op(new_values),1))
879 : !> \param n_cols the number of columns to change in b (defaults to
880 : !> size(op(new_values),2))
881 : !> \param transpose if target_m should be transposed: if true
882 : !> op(target_m)=target_m^T, else op(target_m)=target_m
883 : !> (defaults to false)
884 : !> \par History
885 : !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
886 : !> \author Fawzi Mohamed
887 : !> \note
888 : !> optimized for full column updates. Zeros out a little too much
889 : !> of target_m
890 : !> the target_m is replicated and valid on all cpus
891 : ! **************************************************************************************************
892 73020 : SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
893 : start_col, n_rows, n_cols, transpose)
894 : TYPE(cp_fm_type), INTENT(IN) :: fm
895 : REAL(KIND=dp), DIMENSION(:, :), INTENT(out) :: target_m
896 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
897 : LOGICAL, INTENT(in), OPTIONAL :: transpose
898 :
899 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
900 :
901 : INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
902 : ncol_local, nrow, nrow_global, &
903 : nrow_local, this_col, this_row
904 73020 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
905 : LOGICAL :: tr_a
906 73020 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: full_block
907 : TYPE(mp_para_env_type), POINTER :: para_env
908 :
909 73020 : CALL timeset(routineN, handle)
910 :
911 73020 : i0 = 1; j0 = 1; tr_a = .FALSE.
912 :
913 73020 : CPASSERT(.NOT. fm%use_sp)
914 :
915 73020 : IF (PRESENT(start_row)) i0 = start_row
916 73020 : IF (PRESENT(start_col)) j0 = start_col
917 73020 : IF (PRESENT(transpose)) tr_a = transpose
918 6240 : IF (tr_a) THEN
919 2362 : nrow = SIZE(target_m, 2)
920 2362 : ncol = SIZE(target_m, 1)
921 : ELSE
922 70658 : nrow = SIZE(target_m, 1)
923 70658 : ncol = SIZE(target_m, 2)
924 : END IF
925 73020 : IF (PRESENT(n_rows)) nrow = n_rows
926 73020 : IF (PRESENT(n_cols)) ncol = n_cols
927 :
928 73020 : para_env => fm%matrix_struct%para_env
929 :
930 73020 : full_block => fm%local_data
931 : #if defined(__parallel)
932 : ! zero-out whole target_m
933 73020 : IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
934 97288 : CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
935 : END IF
936 : #endif
937 :
938 : CALL cp_fm_get_info(matrix=fm, &
939 : nrow_global=nrow_global, ncol_global=ncol_global, &
940 : nrow_local=nrow_local, ncol_local=ncol_local, &
941 73020 : row_indices=row_indices, col_indices=col_indices)
942 :
943 477354 : DO j = 1, ncol_local
944 404334 : this_col = col_indices(j) - j0 + 1
945 477354 : IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
946 274010 : IF (tr_a) THEN
947 2362 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
948 81863 : DO i = 1, nrow_local
949 81863 : target_m(this_col, row_indices(i)) = full_block(i, j)
950 : END DO
951 : ELSE
952 0 : DO i = 1, nrow_local
953 0 : this_row = row_indices(i) - i0 + 1
954 0 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
955 0 : target_m(this_col, this_row) = full_block(i, j)
956 : END IF
957 : END DO
958 : END IF
959 : ELSE
960 271648 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
961 6029368 : DO i = 1, nrow_local
962 6029368 : target_m(row_indices(i), this_col) = full_block(i, j)
963 : END DO
964 : ELSE
965 1168114 : DO i = 1, nrow_local
966 1141698 : this_row = row_indices(i) - i0 + 1
967 1168114 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
968 24462 : target_m(this_row, this_col) = full_block(i, j)
969 : END IF
970 : END DO
971 : END IF
972 : END IF
973 : END IF
974 : END DO
975 :
976 15985988 : CALL para_env%sum(target_m)
977 :
978 73020 : CALL timestop(handle)
979 :
980 73020 : END SUBROUTINE cp_fm_get_submatrix
981 :
982 : ! **************************************************************************************************
983 : !> \brief returns all kind of information about the full matrix
984 : !> \param matrix ...
985 : !> \param name ...
986 : !> \param nrow_global ...
987 : !> \param ncol_global ...
988 : !> \param nrow_block ...
989 : !> \param ncol_block ...
990 : !> \param nrow_local ...
991 : !> \param ncol_local ...
992 : !> \param row_indices ...
993 : !> \param col_indices ...
994 : !> \param local_data ...
995 : !> \param context ...
996 : !> \param nrow_locals ...
997 : !> \param ncol_locals ...
998 : !> \param matrix_struct ...
999 : !> \param para_env ...
1000 : !> \note
1001 : !> see also cp_fm_struct for explanation
1002 : !> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1003 : !> access to the local blacs block
1004 : ! **************************************************************************************************
1005 5410849 : SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1006 : nrow_block, ncol_block, nrow_local, ncol_local, &
1007 : row_indices, col_indices, local_data, context, &
1008 : nrow_locals, ncol_locals, matrix_struct, para_env)
1009 :
1010 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1011 : CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1012 : INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1013 : ncol_block, nrow_local, ncol_local
1014 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1015 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1016 : OPTIONAL, POINTER :: local_data
1017 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1018 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1019 : TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1020 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1021 :
1022 8 : IF (PRESENT(name)) name = matrix%name
1023 5410849 : IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1024 5410849 : IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1025 :
1026 : CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1027 : ncol_local=ncol_local, nrow_global=nrow_global, &
1028 : ncol_global=ncol_global, nrow_block=nrow_block, &
1029 : ncol_block=ncol_block, row_indices=row_indices, &
1030 : col_indices=col_indices, nrow_locals=nrow_locals, &
1031 5410849 : ncol_locals=ncol_locals, context=context, para_env=para_env)
1032 :
1033 5410849 : END SUBROUTINE cp_fm_get_info
1034 :
1035 : ! **************************************************************************************************
1036 : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1037 : !> \param matrix a cp_fm_type instance
1038 : !> \param io_unit the I/O unit to use for writing
1039 : ! **************************************************************************************************
1040 3 : SUBROUTINE cp_fm_write_info(matrix, io_unit)
1041 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1042 : INTEGER, INTENT(IN) :: io_unit
1043 :
1044 3 : WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1045 3 : CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1046 3 : END SUBROUTINE cp_fm_write_info
1047 :
1048 : ! **************************************************************************************************
1049 : !> \brief find the maximum absolute value of the matrix element
1050 : !> maxval(abs(matrix))
1051 : !> \param matrix ...
1052 : !> \param a_max ...
1053 : !> \param ir_max ...
1054 : !> \param ic_max ...
1055 : ! **************************************************************************************************
1056 91623 : SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1057 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1058 : REAL(KIND=dp), INTENT(OUT) :: a_max
1059 : INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1060 :
1061 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsval'
1062 :
1063 : INTEGER :: handle, i, ic_max_local, ir_max_local, &
1064 : j, mepos, ncol_local, nrow_local, &
1065 : num_pe
1066 91623 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1067 91623 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1068 : REAL(dp) :: my_max
1069 91623 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1070 91623 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1071 91623 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: my_block_sp
1072 :
1073 91623 : CALL timeset(routineN, handle)
1074 :
1075 91623 : my_block => matrix%local_data
1076 91623 : my_block_sp => matrix%local_data_sp
1077 :
1078 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1079 91623 : row_indices=row_indices, col_indices=col_indices)
1080 :
1081 91623 : IF (matrix%use_sp) THEN
1082 0 : a_max = REAL(MAXVAL(ABS(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
1083 : ELSE
1084 54588539 : a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
1085 : END IF
1086 :
1087 91623 : IF (PRESENT(ir_max)) THEN
1088 0 : num_pe = matrix%matrix_struct%para_env%num_pe
1089 0 : mepos = matrix%matrix_struct%para_env%mepos
1090 0 : ALLOCATE (ir_max_vec(0:num_pe - 1))
1091 0 : ir_max_vec(0:num_pe - 1) = 0
1092 0 : ALLOCATE (ic_max_vec(0:num_pe - 1))
1093 0 : ic_max_vec(0:num_pe - 1) = 0
1094 0 : ALLOCATE (a_max_vec(0:num_pe - 1))
1095 0 : a_max_vec(0:num_pe - 1) = 0.0_dp
1096 0 : my_max = 0.0_dp
1097 :
1098 0 : IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1099 0 : DO i = 1, ncol_local
1100 0 : DO j = 1, nrow_local
1101 0 : IF (matrix%use_sp) THEN
1102 0 : IF (ABS(my_block_sp(j, i)) > my_max) THEN
1103 0 : my_max = REAL(my_block_sp(j, i), dp)
1104 0 : ir_max_local = j
1105 0 : ic_max_local = i
1106 : END IF
1107 : ELSE
1108 0 : IF (ABS(my_block(j, i)) > my_max) THEN
1109 0 : my_max = my_block(j, i)
1110 0 : ir_max_local = j
1111 0 : ic_max_local = i
1112 : END IF
1113 : END IF
1114 : END DO
1115 : END DO
1116 :
1117 0 : a_max_vec(mepos) = my_max
1118 0 : ir_max_vec(mepos) = row_indices(ir_max_local)
1119 0 : ic_max_vec(mepos) = col_indices(ic_max_local)
1120 :
1121 : END IF
1122 :
1123 0 : CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1124 0 : CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1125 0 : CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1126 :
1127 0 : my_max = 0.0_dp
1128 0 : DO i = 0, num_pe - 1
1129 0 : IF (a_max_vec(i) > my_max) THEN
1130 0 : ir_max = ir_max_vec(i)
1131 0 : ic_max = ic_max_vec(i)
1132 : END IF
1133 : END DO
1134 :
1135 0 : DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1136 0 : CPASSERT(ic_max > 0)
1137 0 : CPASSERT(ir_max > 0)
1138 :
1139 : END IF
1140 :
1141 91623 : CALL matrix%matrix_struct%para_env%max(a_max)
1142 :
1143 91623 : CALL timestop(handle)
1144 :
1145 183246 : END SUBROUTINE cp_fm_maxabsval
1146 :
1147 : ! **************************************************************************************************
1148 : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1149 : !> = || A ||_infinity
1150 : !> \param matrix ...
1151 : !> \param a_max ...
1152 : !> \note
1153 : !> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1154 : !> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1155 : !> http://mathworld.wolfram.com/MatrixNorm.html
1156 : !> (but the bound is not so tight in the general case)
1157 : ! **************************************************************************************************
1158 4244 : SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1159 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1160 : REAL(KIND=dp), INTENT(OUT) :: a_max
1161 :
1162 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
1163 :
1164 : INTEGER :: handle, i, j, ncol_local, nrow_global, &
1165 : nrow_local
1166 4244 : INTEGER, DIMENSION(:), POINTER :: row_indices
1167 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: values
1168 4244 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1169 :
1170 4244 : CALL timeset(routineN, handle)
1171 :
1172 4244 : my_block => matrix%local_data
1173 :
1174 4244 : CPASSERT(.NOT. matrix%use_sp)
1175 :
1176 : CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1177 4244 : nrow_local=nrow_local, ncol_local=ncol_local)
1178 :
1179 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1180 12732 : ALLOCATE (values(nrow_global))
1181 62104 : values = 0.0_dp
1182 62104 : DO j = 1, ncol_local
1183 525404 : DO i = 1, nrow_local
1184 521160 : values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
1185 : END DO
1186 : END DO
1187 4244 : CALL matrix%matrix_struct%para_env%sum(values)
1188 66348 : a_max = MAXVAL(values)
1189 4244 : DEALLOCATE (values)
1190 :
1191 4244 : CALL timestop(handle)
1192 8488 : END SUBROUTINE
1193 :
1194 : ! **************************************************************************************************
1195 : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1196 : !> \param matrix ...
1197 : !> \param norm_array ...
1198 : ! **************************************************************************************************
1199 1282 : SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1200 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1201 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1202 :
1203 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_vectorsnorm'
1204 :
1205 : INTEGER :: handle, i, j, ncol_global, ncol_local, &
1206 : nrow_local
1207 1282 : INTEGER, DIMENSION(:), POINTER :: col_indices
1208 1282 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1209 :
1210 1282 : CALL timeset(routineN, handle)
1211 :
1212 1282 : my_block => matrix%local_data
1213 :
1214 1282 : CPASSERT(.NOT. matrix%use_sp)
1215 :
1216 : CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1217 1282 : nrow_local=nrow_local, ncol_local=ncol_local)
1218 :
1219 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1220 29746 : norm_array = 0.0_dp
1221 29746 : DO j = 1, ncol_local
1222 1261016 : DO i = 1, nrow_local
1223 1259734 : norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1224 : END DO
1225 : END DO
1226 58210 : CALL matrix%matrix_struct%para_env%sum(norm_array)
1227 29746 : norm_array = SQRT(norm_array)
1228 :
1229 1282 : CALL timestop(handle)
1230 1282 : END SUBROUTINE
1231 :
1232 : ! **************************************************************************************************
1233 : !> \brief summing up all the elements along the matrix's i-th index
1234 : !> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1235 : !> or
1236 : !> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1237 : !> \param matrix an input matrix A
1238 : !> \param sum_array sums of elements in each column/row
1239 : !> \param dir ...
1240 : !> \note forked from cp_fm_vectorsnorm() to be used with
1241 : !> the maximum overlap method
1242 : !> added row variation
1243 : ! **************************************************************************************************
1244 7194 : SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1245 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1246 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1247 : CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1248 :
1249 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_vectorssum'
1250 :
1251 : INTEGER :: handle, i, j, ncol_local, nrow_local
1252 7194 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1253 : LOGICAL :: docol
1254 7194 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1255 :
1256 7194 : CALL timeset(routineN, handle)
1257 :
1258 7194 : IF (PRESENT(dir)) THEN
1259 7154 : IF (dir == 'c' .OR. dir == 'C') THEN
1260 : docol = .TRUE.
1261 : ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1262 : docol = .FALSE.
1263 : ELSE
1264 0 : CPABORT('Wrong argument DIR')
1265 : END IF
1266 : ELSE
1267 : docol = .TRUE.
1268 : END IF
1269 :
1270 7194 : my_block => matrix%local_data
1271 :
1272 7194 : CPASSERT(.NOT. matrix%use_sp)
1273 :
1274 : CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1275 7194 : nrow_local=nrow_local, ncol_local=ncol_local)
1276 :
1277 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1278 221150 : sum_array(:) = 0.0_dp
1279 7194 : IF (docol) THEN
1280 448 : DO j = 1, ncol_local
1281 3628 : DO i = 1, nrow_local
1282 3588 : sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1283 : END DO
1284 : END DO
1285 : ELSE
1286 82094 : DO j = 1, ncol_local
1287 6429458 : DO i = 1, nrow_local
1288 6422304 : sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1289 : END DO
1290 : END DO
1291 : END IF
1292 435106 : CALL matrix%matrix_struct%para_env%sum(sum_array)
1293 :
1294 7194 : CALL timestop(handle)
1295 7194 : END SUBROUTINE
1296 :
1297 : ! **************************************************************************************************
1298 : !> \brief copy one identically sized matrix in the other
1299 : !> \param source ...
1300 : !> \param destination ...
1301 : !> \note
1302 : !> see also cp_fm_to_fm_columns
1303 : ! **************************************************************************************************
1304 358894 : SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1305 :
1306 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1307 :
1308 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
1309 :
1310 : INTEGER :: handle, npcol, nprow
1311 :
1312 358894 : CALL timeset(routineN, handle)
1313 :
1314 358894 : nprow = source%matrix_struct%context%num_pe(1)
1315 358894 : npcol = source%matrix_struct%context%num_pe(2)
1316 :
1317 358894 : IF ((.NOT. cp2k_is_parallel) .OR. &
1318 : cp_fm_struct_equivalent(source%matrix_struct, &
1319 : destination%matrix_struct)) THEN
1320 358894 : IF (source%use_sp .AND. destination%use_sp) THEN
1321 0 : IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1322 : SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
1323 : CALL cp_abort(__LOCATION__, &
1324 : "Cannot copy full matrix <"//TRIM(source%name)// &
1325 : "> to full matrix <"//TRIM(destination%name)// &
1326 0 : ">. The local_data blocks have different sizes.")
1327 : CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
1328 0 : source%local_data_sp, 1, destination%local_data_sp, 1)
1329 358894 : ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
1330 0 : IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
1331 : SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
1332 : CALL cp_abort(__LOCATION__, &
1333 : "Cannot copy full matrix <"//TRIM(source%name)// &
1334 : "> to full matrix <"//TRIM(destination%name)// &
1335 0 : ">. The local_data blocks have different sizes.")
1336 0 : destination%local_data = REAL(source%local_data_sp, dp)
1337 358894 : ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
1338 0 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1339 : SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
1340 : CALL cp_abort(__LOCATION__, &
1341 : "Cannot copy full matrix <"//TRIM(source%name)// &
1342 : "> to full matrix <"//TRIM(destination%name)// &
1343 0 : ">. The local_data blocks have different sizes.")
1344 0 : destination%local_data_sp = REAL(source%local_data, sp)
1345 : ELSE
1346 358894 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1347 : SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1348 : CALL cp_abort(__LOCATION__, &
1349 : "Cannot copy full matrix <"//TRIM(source%name)// &
1350 : "> to full matrix <"//TRIM(destination%name)// &
1351 0 : ">. The local_data blocks have different sizes.")
1352 : CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1353 358894 : source%local_data, 1, destination%local_data, 1)
1354 : END IF
1355 : ELSE
1356 0 : CPABORT("Data structures of source and target full matrix are not equivalent")
1357 : END IF
1358 :
1359 358894 : CALL timestop(handle)
1360 :
1361 358894 : END SUBROUTINE cp_fm_to_fm_matrix
1362 :
1363 : ! **************************************************************************************************
1364 : !> \brief copy just a subset of columns of a fm to a fm
1365 : !> \param msource ...
1366 : !> \param mtarget ...
1367 : !> \param ncol ...
1368 : !> \param source_start ...
1369 : !> \param target_start ...
1370 : ! **************************************************************************************************
1371 163594 : SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1372 : target_start)
1373 :
1374 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1375 : INTEGER, INTENT(IN) :: ncol
1376 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1377 :
1378 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
1379 :
1380 : INTEGER :: handle, n, ss, ts
1381 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1382 : #if defined(__parallel)
1383 : INTEGER :: i
1384 : INTEGER, DIMENSION(9) :: desca, descb
1385 : #endif
1386 :
1387 163594 : CALL timeset(routineN, handle)
1388 :
1389 163594 : ss = 1
1390 163594 : ts = 1
1391 :
1392 163594 : IF (PRESENT(source_start)) ss = source_start
1393 163594 : IF (PRESENT(target_start)) ts = target_start
1394 :
1395 163594 : n = msource%matrix_struct%nrow_global
1396 :
1397 163594 : a => msource%local_data
1398 163594 : b => mtarget%local_data
1399 :
1400 : #if defined(__parallel)
1401 1635940 : desca(:) = msource%matrix_struct%descriptor(:)
1402 1635940 : descb(:) = mtarget%matrix_struct%descriptor(:)
1403 655308 : DO i = 0, ncol - 1
1404 655308 : CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1405 : END DO
1406 : #else
1407 : IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
1408 : CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1409 : END IF
1410 : #endif
1411 :
1412 163594 : CALL timestop(handle)
1413 :
1414 163594 : END SUBROUTINE cp_fm_to_fm_columns
1415 :
1416 : ! **************************************************************************************************
1417 : !> \brief copy just a triangular matrix
1418 : !> \param msource ...
1419 : !> \param mtarget ...
1420 : !> \param uplo ...
1421 : ! **************************************************************************************************
1422 370 : SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1423 :
1424 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1425 : CHARACTER(LEN=1), OPTIONAL, INTENT(IN) :: uplo
1426 :
1427 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
1428 :
1429 : CHARACTER(LEN=1) :: myuplo
1430 : INTEGER :: handle, ncol, nrow
1431 370 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1432 : #if defined(__parallel)
1433 : INTEGER, DIMENSION(9) :: desca, descb
1434 : #endif
1435 :
1436 370 : CALL timeset(routineN, handle)
1437 :
1438 370 : myuplo = 'U'
1439 370 : IF (PRESENT(uplo)) myuplo = uplo
1440 :
1441 370 : nrow = msource%matrix_struct%nrow_global
1442 370 : ncol = msource%matrix_struct%ncol_global
1443 :
1444 370 : a => msource%local_data
1445 370 : b => mtarget%local_data
1446 :
1447 : #if defined(__parallel)
1448 3700 : desca(:) = msource%matrix_struct%descriptor(:)
1449 3700 : descb(:) = mtarget%matrix_struct%descriptor(:)
1450 370 : CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1451 : #else
1452 : CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1453 : #endif
1454 :
1455 370 : CALL timestop(handle)
1456 :
1457 370 : END SUBROUTINE cp_fm_to_fm_triangular
1458 :
1459 : ! **************************************************************************************************
1460 : !> \brief copy just a part ot the matrix
1461 : !> \param msource ...
1462 : !> \param mtarget ...
1463 : !> \param nrow ...
1464 : !> \param ncol ...
1465 : !> \param s_firstrow ...
1466 : !> \param s_firstcol ...
1467 : !> \param t_firstrow ...
1468 : !> \param t_firstcol ...
1469 : ! **************************************************************************************************
1470 :
1471 11152 : SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1472 :
1473 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1474 : INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1475 : s_firstcol, t_firstrow, &
1476 : t_firstcol
1477 :
1478 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
1479 :
1480 : INTEGER :: handle, i, na, nb, ss, ts
1481 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1482 : #if defined(__parallel)
1483 : INTEGER, DIMENSION(9) :: desca, descb
1484 : #endif
1485 :
1486 11152 : CALL timeset(routineN, handle)
1487 :
1488 11152 : a => msource%local_data
1489 11152 : b => mtarget%local_data
1490 :
1491 11152 : na = msource%matrix_struct%nrow_global
1492 11152 : nb = mtarget%matrix_struct%nrow_global
1493 : ! nrow must be <= na and nb
1494 11152 : IF (nrow > na) &
1495 0 : CPABORT("cannot copy because nrow > number of rows of source matrix")
1496 11152 : IF (nrow > nb) &
1497 0 : CPABORT("cannot copy because nrow > number of rows of target matrix")
1498 11152 : na = msource%matrix_struct%ncol_global
1499 11152 : nb = mtarget%matrix_struct%ncol_global
1500 : ! ncol must be <= na_col and nb_col
1501 11152 : IF (ncol > na) &
1502 0 : CPABORT("cannot copy because nrow > number of rows of source matrix")
1503 11152 : IF (ncol > nb) &
1504 0 : CPABORT("cannot copy because nrow > number of rows of target matrix")
1505 :
1506 : #if defined(__parallel)
1507 111520 : desca(:) = msource%matrix_struct%descriptor(:)
1508 111520 : descb(:) = mtarget%matrix_struct%descriptor(:)
1509 126112 : DO i = 0, ncol - 1
1510 114960 : ss = s_firstcol + i
1511 114960 : ts = t_firstcol + i
1512 126112 : CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1513 : END DO
1514 : #else
1515 : DO i = 0, ncol - 1
1516 : ss = s_firstcol + i
1517 : ts = t_firstcol + i
1518 : CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1519 : END DO
1520 : #endif
1521 :
1522 11152 : CALL timestop(handle)
1523 11152 : END SUBROUTINE cp_fm_to_fm_submat
1524 :
1525 : ! **************************************************************************************************
1526 : !> \brief General copy of a fm matrix to another fm matrix.
1527 : !> Uses non-blocking MPI rather than ScaLAPACK.
1528 : !>
1529 : !> \param source input fm matrix
1530 : !> \param destination output fm matrix
1531 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1532 : !> of the input and output matrices
1533 : !> \par History
1534 : !> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1535 : ! **************************************************************************************************
1536 8806 : SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1537 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1538 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1539 :
1540 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
1541 :
1542 : INTEGER :: handle
1543 79254 : TYPE(copy_info_type) :: info
1544 :
1545 8806 : CALL timeset(routineN, handle)
1546 :
1547 8806 : CALL cp_fm_start_copy_general(source, destination, para_env, info)
1548 8806 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1549 8794 : CALL cp_fm_finish_copy_general(destination, info)
1550 : END IF
1551 8806 : IF (ASSOCIATED(source%matrix_struct)) THEN
1552 8654 : CALL cp_fm_cleanup_copy_general(info)
1553 : END IF
1554 :
1555 8806 : CALL timestop(handle)
1556 8806 : END SUBROUTINE cp_fm_copy_general
1557 :
1558 : ! **************************************************************************************************
1559 : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1560 : !> \param source input fm matrix
1561 : !> \param destination output fm matrix
1562 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1563 : !> of the input and output matrices
1564 : !> \param info all of the data that will be needed to complete the copy operation
1565 : ! **************************************************************************************************
1566 1594420 : SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1567 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1568 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1569 : TYPE(copy_info_type), INTENT(OUT) :: info
1570 :
1571 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
1572 :
1573 : INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1574 : ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1575 : nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1576 : recv_rank, recv_size, send_rank, send_size
1577 159442 : INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1578 318884 : recv_count, send_count, send_disp, &
1579 159442 : source2global, src_p, src_q
1580 159442 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1581 : INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1582 : src_block, src_block_tmp, src_num_pe
1583 318884 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1584 318884 : send_col_indices, send_row_indices
1585 : TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1586 2232188 : TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1587 :
1588 159442 : CALL timeset(routineN, handle)
1589 :
1590 : IF (.NOT. cp2k_is_parallel) THEN
1591 : ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1592 : nrow_local_send = SIZE(source%local_data, 1)
1593 : ncol_local_send = SIZE(source%local_data, 2)
1594 : ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1595 : k = 0
1596 : DO j = 1, ncol_local_send
1597 : DO i = 1, nrow_local_send
1598 : k = k + 1
1599 : info%send_buf(k) = source%local_data(i, j)
1600 : END DO
1601 : END DO
1602 : ELSE
1603 : ! assumption of double precision data is carried forward from ScaLAPACK version
1604 159442 : IF (source%use_sp) CPABORT("only DP kind implemented")
1605 159442 : IF (destination%use_sp) CPABORT("only DP kind implemented")
1606 :
1607 159442 : NULLIFY (recv_dist, send_dist)
1608 159442 : NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1609 :
1610 : ! The 'global' communicator contains both the source and destination decompositions
1611 159442 : global_size = para_env%num_pe
1612 159442 : global_rank = para_env%mepos
1613 :
1614 : ! The source/send decomposition and destination/recv decompositions may only exist on
1615 : ! on a subset of the processes involved in the communication
1616 : ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1617 : ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1618 159442 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1619 127470 : recv_dist => destination%matrix_struct
1620 127470 : recv_rank = recv_dist%para_env%mepos
1621 : ELSE
1622 31972 : recv_rank = mp_proc_null
1623 : END IF
1624 :
1625 159442 : IF (ASSOCIATED(source%matrix_struct)) THEN
1626 140922 : send_dist => source%matrix_struct
1627 140922 : send_rank = send_dist%para_env%mepos
1628 : ELSE
1629 18520 : send_rank = mp_proc_null
1630 : END IF
1631 :
1632 : ! Map the rank in the source/dest communicator to the global rank
1633 478326 : ALLOCATE (all_ranks(0:global_size - 1))
1634 :
1635 159442 : CALL para_env%allgather(send_rank, all_ranks)
1636 159442 : IF (ASSOCIATED(recv_dist)) THEN
1637 637350 : ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
1638 382410 : DO i = 0, global_size - 1
1639 382410 : IF (all_ranks(i) .NE. mp_proc_null) THEN
1640 217900 : source2global(all_ranks(i)) = i
1641 : END IF
1642 : END DO
1643 : END IF
1644 :
1645 159442 : CALL para_env%allgather(recv_rank, all_ranks)
1646 159442 : IF (ASSOCIATED(send_dist)) THEN
1647 704610 : ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
1648 422766 : DO i = 0, global_size - 1
1649 422766 : IF (all_ranks(i) .NE. mp_proc_null) THEN
1650 217900 : dest2global(all_ranks(i)) = i
1651 : END IF
1652 : END DO
1653 : END IF
1654 159442 : DEALLOCATE (all_ranks)
1655 :
1656 : ! Some data from the two decompositions will be needed by all processes in the global group :
1657 : ! process grid shape, block size, and the BLACS-to-MPI mapping
1658 :
1659 : ! The global root process will receive the data (from the root process in each decomposition)
1660 1116094 : send_req(:) = mp_request_null
1661 159442 : IF (global_rank == 0) THEN
1662 558047 : recv_req(:) = mp_request_null
1663 79721 : CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1664 79721 : CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1665 79721 : CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1666 79721 : CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1667 : END IF
1668 :
1669 159442 : IF (ASSOCIATED(send_dist)) THEN
1670 140922 : IF ((send_rank .EQ. 0)) THEN
1671 : ! need to use separate buffers here in case this is actually global rank 0
1672 239163 : src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
1673 79721 : CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1674 79721 : CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1675 : END IF
1676 : END IF
1677 :
1678 159442 : IF (ASSOCIATED(recv_dist)) THEN
1679 127470 : IF ((recv_rank .EQ. 0)) THEN
1680 239163 : dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
1681 79721 : CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1682 79721 : CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1683 : END IF
1684 : END IF
1685 :
1686 159442 : IF (global_rank == 0) THEN
1687 79721 : CALL mp_waitall(recv_req(1:4))
1688 : ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1689 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1690 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1691 558047 : )
1692 79721 : CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1693 79721 : CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1694 : END IF
1695 :
1696 159442 : IF (ASSOCIATED(send_dist)) THEN
1697 140922 : IF ((send_rank .EQ. 0)) THEN
1698 79721 : CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1699 : END IF
1700 : END IF
1701 :
1702 159442 : IF (ASSOCIATED(recv_dist)) THEN
1703 127470 : IF ((recv_rank .EQ. 0)) THEN
1704 79721 : CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1705 : END IF
1706 : END IF
1707 :
1708 159442 : IF (global_rank == 0) THEN
1709 79721 : CALL mp_waitall(recv_req(5:6))
1710 : END IF
1711 :
1712 : ! Finally, broadcast the data to all processes in the global communicator
1713 159442 : CALL para_env%bcast(src_block, 0)
1714 159442 : CALL para_env%bcast(dest_block, 0)
1715 159442 : CALL para_env%bcast(src_num_pe, 0)
1716 159442 : CALL para_env%bcast(dest_num_pe, 0)
1717 478326 : info%src_num_pe(1:2) = src_num_pe(1:2)
1718 478326 : info%nblock_src(1:2) = src_block(1:2)
1719 159442 : IF (global_rank .NE. 0) THEN
1720 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1721 0 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1722 558047 : )
1723 : END IF
1724 159442 : CALL para_env%bcast(info%src_blacs2mpi, 0)
1725 159442 : CALL para_env%bcast(dest_blacs2mpi, 0)
1726 :
1727 159442 : recv_size = dest_num_pe(1)*dest_num_pe(2)
1728 159442 : send_size = src_num_pe(1)*src_num_pe(2)
1729 159442 : info%send_size = send_size
1730 159442 : CALL mp_waitall(send_req(:))
1731 :
1732 : ! Setup is now complete, we can start the actual communication here.
1733 : ! The order implemented here is:
1734 : ! DEST_1
1735 : ! compute recv sizes
1736 : ! call irecv
1737 : ! SRC_1
1738 : ! compute send sizes
1739 : ! pack send buffers
1740 : ! call isend
1741 : ! DEST_2
1742 : ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1743 : ! SRC_2
1744 : ! wait for the sends
1745 :
1746 : ! DEST_1
1747 159442 : IF (ASSOCIATED(recv_dist)) THEN
1748 : CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1749 : col_indices=recv_col_indices &
1750 127470 : )
1751 127470 : info%recv_col_indices => recv_col_indices
1752 127470 : info%recv_row_indices => recv_row_indices
1753 127470 : nrow_block_src = src_block(1)
1754 127470 : ncol_block_src = src_block(2)
1755 855250 : ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1756 :
1757 : ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1758 127470 : nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1759 127470 : ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1760 127470 : info%nlocal_recv(1) = nrow_local_recv
1761 127470 : info%nlocal_recv(2) = ncol_local_recv
1762 : ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1763 637350 : ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1764 2422161 : DO i = 1, nrow_local_recv
1765 : ! For each local row we will receive, we look up its global row (in recv_row_indices),
1766 : ! then work out which row block it comes from, and which process row that row block comes from.
1767 2422161 : src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1768 : END DO
1769 4062996 : DO j = 1, ncol_local_recv
1770 : ! Similarly for the columns
1771 4062996 : src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1772 : END DO
1773 : ! src_p/q now contains the process row/column ID that will send data to that row/column
1774 :
1775 254940 : DO q = 0, src_num_pe(2) - 1
1776 4062996 : ncols = COUNT(src_q .EQ. q)
1777 472840 : DO p = 0, src_num_pe(1) - 1
1778 4430466 : nrows = COUNT(src_p .EQ. p)
1779 : ! Use the send_dist here as we are looking up the processes where the data comes from
1780 345370 : recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1781 : END DO
1782 : END DO
1783 127470 : DEALLOCATE (src_p, src_q)
1784 :
1785 : ! Use one long buffer (and displacements into that buffer)
1786 : ! this prevents the need for a rectangular array where not all elements will be populated
1787 600310 : ALLOCATE (info%recv_buf(SUM(recv_count(:))))
1788 127470 : info%recv_disp(0) = 0
1789 217900 : DO i = 1, send_size - 1
1790 217900 : info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1791 : END DO
1792 :
1793 : ! Issue receive calls on ranks which expect data
1794 345370 : DO k = 0, send_size - 1
1795 345370 : IF (recv_count(k) .GT. 0) THEN
1796 : CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1797 159886 : source2global(k), info%recv_request(k))
1798 : END IF
1799 : END DO
1800 127470 : DEALLOCATE (source2global)
1801 : END IF ! ASSOCIATED(recv_dist)
1802 :
1803 : ! SRC_1
1804 159442 : IF (ASSOCIATED(send_dist)) THEN
1805 : CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1806 : col_indices=send_col_indices &
1807 140922 : )
1808 140922 : nrow_block_dest = dest_block(1)
1809 140922 : ncol_block_dest = dest_block(2)
1810 922510 : ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1811 :
1812 : ! Determine the send counts, allocate the send buffers
1813 140922 : nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1814 140922 : ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1815 :
1816 : ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1817 : ! i.e. number of rows,cols in the sending distribution
1818 704610 : ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1819 :
1820 2435613 : DO i = 1, nrow_local_send
1821 : ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1822 2435613 : dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1823 : END DO
1824 4347180 : DO j = 1, ncol_local_send
1825 4347180 : dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1826 : END DO
1827 : ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1828 :
1829 281844 : DO q = 0, dest_num_pe(2) - 1
1830 4347180 : ncols = COUNT(dest_q .EQ. q)
1831 499744 : DO p = 0, dest_num_pe(1) - 1
1832 4160238 : nrows = COUNT(dest_p .EQ. p)
1833 358822 : send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1834 : END DO
1835 : END DO
1836 140922 : DEALLOCATE (dest_p, dest_q)
1837 :
1838 : ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1839 640666 : ALLOCATE (info%send_buf(SUM(send_count(:))))
1840 140922 : send_disp(0) = 0
1841 217900 : DO k = 1, recv_size - 1
1842 217900 : send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1843 : END DO
1844 :
1845 : ! Loop over the smat, pack the send buffers
1846 358822 : send_count(:) = 0
1847 4347180 : DO j = 1, ncol_local_send
1848 : ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1849 4206258 : dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1850 172252494 : DO i = 1, nrow_local_send
1851 167905314 : dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1852 167905314 : mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1853 167905314 : send_count(mpi_rank) = send_count(mpi_rank) + 1
1854 172111572 : info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1855 : END DO
1856 : END DO
1857 :
1858 : ! For each non-zero send_count, call mpi_isend
1859 358822 : DO k = 0, recv_size - 1
1860 358822 : IF (send_count(k) .GT. 0) THEN
1861 : CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1862 159886 : dest2global(k), info%send_request(k))
1863 : END IF
1864 : END DO
1865 140922 : DEALLOCATE (send_count, send_disp, dest2global)
1866 : END IF ! ASSOCIATED(send_dist)
1867 159442 : DEALLOCATE (dest_blacs2mpi)
1868 :
1869 : END IF !IF (.NOT. cp2k_is_parallel)
1870 :
1871 159442 : CALL timestop(handle)
1872 :
1873 637768 : END SUBROUTINE cp_fm_start_copy_general
1874 :
1875 : ! **************************************************************************************************
1876 : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1877 : !> \param destination output fm matrix
1878 : !> \param info all of the data that will be needed to complete the copy operation
1879 : ! **************************************************************************************************
1880 127470 : SUBROUTINE cp_fm_finish_copy_general(destination, info)
1881 : TYPE(cp_fm_type), INTENT(IN) :: destination
1882 : TYPE(copy_info_type), INTENT(INOUT) :: info
1883 :
1884 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
1885 :
1886 : INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1887 : src_p_i, src_q_j
1888 127470 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1889 : INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1890 127470 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1891 :
1892 127470 : CALL timeset(routineN, handle)
1893 :
1894 : IF (.NOT. cp2k_is_parallel) THEN
1895 : ! Now unpack the data from the 'send buffer'
1896 : k = 0
1897 : DO j = 1, SIZE(destination%local_data, 2)
1898 : DO i = 1, SIZE(destination%local_data, 1)
1899 : k = k + 1
1900 : destination%local_data(i, j) = info%send_buf(k)
1901 : END DO
1902 : END DO
1903 : DEALLOCATE (info%send_buf)
1904 : ELSE
1905 : ! Set up local variables ...
1906 127470 : send_size = info%send_size
1907 382410 : nlocal_recv(1:2) = info%nlocal_recv(:)
1908 382410 : nblock_src(1:2) = info%nblock_src(:)
1909 382410 : src_num_pe(1:2) = info%src_num_pe(:)
1910 127470 : recv_col_indices => info%recv_col_indices
1911 127470 : recv_row_indices => info%recv_row_indices
1912 :
1913 : ! ... use the local variables to do the work
1914 : ! DEST_2
1915 127470 : CALL mp_waitall(info%recv_request(:))
1916 382410 : ALLOCATE (recv_count(0:send_size - 1))
1917 : ! Loop over the rmat, filling it in with data from the recv buffers
1918 : ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1919 345370 : recv_count(:) = 0
1920 4062996 : DO j = 1, nlocal_recv(2)
1921 3935526 : src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
1922 171968310 : DO i = 1, nlocal_recv(1)
1923 167905314 : src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
1924 167905314 : mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
1925 167905314 : recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1926 171840840 : destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1927 : END DO
1928 : END DO
1929 127470 : DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1930 : ! Invalidate the stored state
1931 : NULLIFY (info%recv_col_indices, &
1932 127470 : info%recv_row_indices)
1933 :
1934 : END IF
1935 :
1936 127470 : CALL timestop(handle)
1937 :
1938 127470 : END SUBROUTINE cp_fm_finish_copy_general
1939 :
1940 : ! **************************************************************************************************
1941 : !> \brief Completes the copy operation: wait for comms clean up MPI state
1942 : !> \param info all of the data that will be needed to complete the copy operation
1943 : ! **************************************************************************************************
1944 140258 : SUBROUTINE cp_fm_cleanup_copy_general(info)
1945 : TYPE(copy_info_type), INTENT(INOUT) :: info
1946 :
1947 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
1948 :
1949 : INTEGER :: handle
1950 :
1951 140258 : CALL timeset(routineN, handle)
1952 :
1953 : IF (.NOT. cp2k_is_parallel) THEN
1954 : ! Don't do anything - no MPI state for the serial case
1955 : ELSE
1956 : ! SRC_2
1957 : ! If this process is also in the destination decomposition, this deallocate
1958 : ! Was already done in cp_fm_finish_copy_general
1959 140258 : IF (ALLOCATED(info%src_blacs2mpi)) THEN
1960 31308 : DEALLOCATE (info%src_blacs2mpi)
1961 : END IF
1962 140258 : CALL mp_waitall(info%send_request)
1963 140258 : DEALLOCATE (info%send_request, info%send_buf)
1964 :
1965 : END IF
1966 :
1967 140258 : CALL timestop(handle)
1968 :
1969 140258 : END SUBROUTINE cp_fm_cleanup_copy_general
1970 :
1971 : ! **************************************************************************************************
1972 : !> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
1973 : !> The two matrices can have different contexts.
1974 : !>
1975 : !> Summary of distribution routines for dense matrices
1976 : !> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
1977 : !>
1978 : !> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
1979 : !>
1980 : !> A process that is not a part of the context of A should set descA(2)
1981 : !> to -1, and similarly for B.
1982 : !>
1983 : !> \param source input fm matrix
1984 : !> \param destination output fm matrix
1985 : !> \param nrows number of rows of sub matrix to be copied
1986 : !> \param ncols number of cols of sub matrix to be copied
1987 : !> \param s_firstrow starting global row index of sub matrix in source
1988 : !> \param s_firstcol starting global col index of sub matrix in source
1989 : !> \param d_firstrow starting global row index of sub matrix in destination
1990 : !> \param d_firstcol starting global col index of sub matrix in destination
1991 : !> \param global_context process grid that covers all parts of either A or B.
1992 : ! **************************************************************************************************
1993 16506 : SUBROUTINE cp_fm_to_fm_submat_general(source, &
1994 : destination, &
1995 : nrows, &
1996 : ncols, &
1997 : s_firstrow, &
1998 : s_firstcol, &
1999 : d_firstrow, &
2000 : d_firstcol, &
2001 : global_context)
2002 :
2003 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
2004 : INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
2005 : d_firstrow, d_firstcol
2006 :
2007 : CLASS(cp_blacs_type), INTENT(IN) :: global_context
2008 :
2009 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
2010 :
2011 : LOGICAL :: debug
2012 : INTEGER :: handle
2013 : #if defined(__parallel)
2014 : INTEGER, DIMENSION(9) :: desca, descb
2015 : REAL(KIND=dp), DIMENSION(1, 1), TARGET :: dummy
2016 16506 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: smat, dmat
2017 : #endif
2018 :
2019 16506 : CALL timeset(routineN, handle)
2020 :
2021 16506 : debug = debug_this_module
2022 :
2023 : IF (.NOT. cp2k_is_parallel) THEN
2024 : CALL cp_fm_to_fm_submat(source, &
2025 : destination, &
2026 : nrows, &
2027 : ncols, &
2028 : s_firstrow, &
2029 : s_firstcol, &
2030 : d_firstrow, &
2031 : d_firstcol)
2032 : ELSE
2033 : #ifdef __parallel
2034 16506 : NULLIFY (smat, dmat)
2035 : ! check whether source is available on this process
2036 16506 : IF (ASSOCIATED(source%matrix_struct)) THEN
2037 165060 : desca = source%matrix_struct%descriptor
2038 16506 : IF (source%use_sp) CPABORT("only DP kind implemented")
2039 16506 : IF (nrows .GT. source%matrix_struct%nrow_global) &
2040 0 : CPABORT("nrows is greater than nrow_global of source")
2041 16506 : IF (ncols .GT. source%matrix_struct%ncol_global) &
2042 0 : CPABORT("ncols is greater than ncol_global of source")
2043 16506 : smat => source%local_data
2044 : ELSE
2045 0 : desca = -1
2046 0 : smat => dummy
2047 : END IF
2048 : ! check destination is available on this process
2049 16506 : IF (ASSOCIATED(destination%matrix_struct)) THEN
2050 165060 : descb = destination%matrix_struct%descriptor
2051 16506 : IF (destination%use_sp) CPABORT("only DP kind implemented")
2052 16506 : IF (nrows .GT. destination%matrix_struct%nrow_global) &
2053 0 : CPABORT("nrows is greater than nrow_global of destination")
2054 16506 : IF (ncols .GT. destination%matrix_struct%ncol_global) &
2055 0 : CPABORT("ncols is greater than ncol_global of destination")
2056 16506 : dmat => destination%local_data
2057 : ELSE
2058 0 : descb = -1
2059 0 : dmat => dummy
2060 : END IF
2061 : ! do copy
2062 :
2063 : CALL pdgemr2d(nrows, &
2064 : ncols, &
2065 : smat, &
2066 : s_firstrow, &
2067 : s_firstcol, &
2068 : desca, &
2069 : dmat, &
2070 : d_firstrow, &
2071 : d_firstcol, &
2072 : descb, &
2073 16506 : global_context%get_handle())
2074 : #else
2075 : MARK_USED(global_context)
2076 : CPABORT("this subroutine only supports SCALAPACK")
2077 : #endif
2078 : END IF
2079 :
2080 16506 : CALL timestop(handle)
2081 :
2082 16506 : END SUBROUTINE cp_fm_to_fm_submat_general
2083 :
2084 : ! **************************************************************************************************
2085 : !> \brief ...
2086 : !> \param matrix ...
2087 : !> \param irow_global ...
2088 : !> \param icol_global ...
2089 : !> \param alpha ...
2090 : ! **************************************************************************************************
2091 240 : SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2092 :
2093 : ! Add alpha to the matrix element specified by the global indices
2094 : ! irow_global and icol_global
2095 :
2096 : ! - Creation (05.05.06,MK)
2097 :
2098 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2099 : INTEGER, INTENT(IN) :: irow_global, icol_global
2100 : REAL(KIND=dp), INTENT(IN) :: alpha
2101 :
2102 : INTEGER :: mypcol, myprow, npcol, nprow
2103 240 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2104 : TYPE(cp_blacs_env_type), POINTER :: context
2105 : #if defined(__parallel)
2106 : INTEGER :: icol_local, ipcol, iprow, &
2107 : irow_local
2108 : INTEGER, DIMENSION(9) :: desca
2109 : #endif
2110 :
2111 240 : context => matrix%matrix_struct%context
2112 :
2113 240 : myprow = context%mepos(1)
2114 240 : mypcol = context%mepos(2)
2115 :
2116 240 : nprow = context%num_pe(1)
2117 240 : npcol = context%num_pe(2)
2118 :
2119 240 : a => matrix%local_data
2120 :
2121 : #if defined(__parallel)
2122 :
2123 2400 : desca(:) = matrix%matrix_struct%descriptor(:)
2124 :
2125 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2126 240 : irow_local, icol_local, iprow, ipcol)
2127 :
2128 240 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2129 120 : a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2130 : END IF
2131 :
2132 : #else
2133 :
2134 : a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2135 :
2136 : #endif
2137 :
2138 240 : END SUBROUTINE cp_fm_add_to_element
2139 :
2140 : ! **************************************************************************************************
2141 : !> \brief ...
2142 : !> \param fm ...
2143 : !> \param unit ...
2144 : ! **************************************************************************************************
2145 78144 : SUBROUTINE cp_fm_write_unformatted(fm, unit)
2146 : TYPE(cp_fm_type), INTENT(IN) :: fm
2147 : INTEGER, INTENT(IN) :: unit
2148 :
2149 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
2150 :
2151 : INTEGER :: handle, j, max_block, &
2152 : ncol_global, nrow_global
2153 : TYPE(mp_para_env_type), POINTER :: para_env
2154 : #if defined(__parallel)
2155 : INTEGER :: i, i_block, icol_local, &
2156 : in, info, ipcol, &
2157 : iprow, irow_local, &
2158 : mepos, &
2159 : num_pe, rb, tag
2160 : INTEGER, DIMENSION(9) :: desc
2161 78144 : REAL(KIND=dp), DIMENSION(:), POINTER :: vecbuf
2162 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: newdat
2163 : TYPE(cp_blacs_type) :: ictxt_loc
2164 : INTEGER, EXTERNAL :: numroc
2165 : #endif
2166 :
2167 78144 : CALL timeset(routineN, handle)
2168 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2169 78144 : para_env=para_env)
2170 :
2171 : #if defined(__parallel)
2172 78144 : num_pe = para_env%num_pe
2173 78144 : mepos = para_env%mepos
2174 78144 : rb = nrow_global
2175 78144 : tag = 0
2176 : ! get a new context
2177 78144 : CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2178 78144 : CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2179 78144 : CPASSERT(info == 0)
2180 : ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2181 : myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2182 156288 : in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2183 :
2184 312576 : ALLOCATE (newdat(nrow_global, MAX(1, in)))
2185 :
2186 : ! do the actual scalapack to cols reordering
2187 : CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2188 : fm%matrix_struct%descriptor, &
2189 78144 : newdat, 1, 1, desc, ictxt_loc%get_handle())
2190 :
2191 234432 : ALLOCATE (vecbuf(nrow_global*max_block))
2192 54891602 : vecbuf = HUGE(1.0_dp) ! init for valgrind
2193 :
2194 343597 : DO i = 1, ncol_global, MAX(max_block, 1)
2195 187309 : i_block = MIN(max_block, ncol_global - i + 1)
2196 : CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2197 187309 : irow_local, icol_local, iprow, ipcol)
2198 187309 : IF (ipcol == mypcol) THEN
2199 935351 : DO j = 1, i_block
2200 145946249 : vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2201 : END DO
2202 : END IF
2203 :
2204 187309 : IF (ipcol == 0) THEN
2205 : ! do nothing
2206 : ELSE
2207 64320 : IF (ipcol == mypcol) THEN
2208 36106610 : CALL para_env%send(vecbuf(:), 0, tag)
2209 : END IF
2210 64320 : IF (mypcol == 0) THEN
2211 72181060 : CALL para_env%recv(vecbuf(:), ipcol, tag)
2212 : END IF
2213 : END IF
2214 :
2215 265453 : IF (unit > 0) THEN
2216 905375 : DO j = 1, i_block
2217 905375 : WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2218 : END DO
2219 : END IF
2220 :
2221 : END DO
2222 : END ASSOCIATE
2223 78144 : DEALLOCATE (vecbuf)
2224 :
2225 78144 : CALL ictxt_loc%gridexit()
2226 :
2227 78144 : DEALLOCATE (newdat)
2228 :
2229 : #else
2230 :
2231 : IF (unit > 0) THEN
2232 : DO j = 1, ncol_global
2233 : WRITE (unit) fm%local_data(:, j)
2234 : END DO
2235 : END IF
2236 :
2237 : #endif
2238 78144 : CALL timestop(handle)
2239 :
2240 390720 : END SUBROUTINE cp_fm_write_unformatted
2241 :
2242 : ! **************************************************************************************************
2243 : !> \brief Write out a full matrix in plain text.
2244 : !> \param fm the matrix to be outputted
2245 : !> \param unit the unit number for I/O
2246 : !> \param header optional header
2247 : !> \param value_format ...
2248 : ! **************************************************************************************************
2249 1226 : SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2250 : TYPE(cp_fm_type), INTENT(IN) :: fm
2251 : INTEGER, INTENT(IN) :: unit
2252 : CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2253 :
2254 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
2255 :
2256 : CHARACTER(LEN=21) :: my_value_format
2257 : INTEGER :: handle, i, j, max_block, &
2258 : ncol_global, nrow_global
2259 : TYPE(mp_para_env_type), POINTER :: para_env
2260 : #if defined(__parallel)
2261 : INTEGER :: i_block, icol_local, &
2262 : in, info, ipcol, &
2263 : iprow, irow_local, &
2264 : mepos, num_pe, rb, tag, k, &
2265 : icol, irow
2266 : INTEGER, DIMENSION(9) :: desc
2267 1226 : REAL(KIND=dp), DIMENSION(:), POINTER :: vecbuf
2268 1226 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: newdat
2269 : TYPE(cp_blacs_type) :: ictxt_loc
2270 : INTEGER, EXTERNAL :: numroc
2271 : #endif
2272 :
2273 1226 : CALL timeset(routineN, handle)
2274 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2275 1226 : para_env=para_env)
2276 :
2277 1226 : IF (PRESENT(value_format)) THEN
2278 0 : CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
2279 0 : my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
2280 : ELSE
2281 1226 : my_value_format = "(I10, I10, ES24.12)"
2282 : END IF
2283 :
2284 1226 : IF (unit > 0) THEN
2285 5 : IF (PRESENT(header)) WRITE (unit, *) header
2286 5 : WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
2287 : END IF
2288 :
2289 : #if defined(__parallel)
2290 1226 : num_pe = para_env%num_pe
2291 1226 : mepos = para_env%mepos
2292 1226 : rb = nrow_global
2293 1226 : tag = 0
2294 : ! get a new context
2295 1226 : CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2296 1226 : CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2297 1226 : CPASSERT(info == 0)
2298 : ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2299 : myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2300 2452 : in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2301 :
2302 4904 : ALLOCATE (newdat(nrow_global, MAX(1, in)))
2303 :
2304 : ! do the actual scalapack to cols reordering
2305 : CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2306 : fm%matrix_struct%descriptor, &
2307 1226 : newdat, 1, 1, desc, ictxt_loc%get_handle())
2308 :
2309 3678 : ALLOCATE (vecbuf(nrow_global*max_block))
2310 4736 : vecbuf = HUGE(1.0_dp) ! init for valgrind
2311 1226 : irow = 1
2312 1226 : icol = 1
2313 :
2314 4898 : DO i = 1, ncol_global, MAX(max_block, 1)
2315 2446 : i_block = MIN(max_block, ncol_global - i + 1)
2316 : CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2317 2446 : irow_local, icol_local, iprow, ipcol)
2318 2446 : IF (ipcol == mypcol) THEN
2319 2469 : DO j = 1, i_block
2320 8553 : vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2321 : END DO
2322 : END IF
2323 :
2324 2446 : IF (ipcol == 0) THEN
2325 : ! do nothing
2326 : ELSE
2327 1220 : IF (ipcol == mypcol) THEN
2328 2196 : CALL para_env%send(vecbuf(:), 0, tag)
2329 : END IF
2330 1220 : IF (mypcol == 0) THEN
2331 3782 : CALL para_env%recv(vecbuf(:), ipcol, tag)
2332 : END IF
2333 : END IF
2334 :
2335 3672 : IF (unit > 0) THEN
2336 37 : DO j = 1, i_block
2337 647 : DO k = (j - 1)*nrow_global + 1, nrow_global*j
2338 610 : WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
2339 610 : irow = irow + 1
2340 640 : IF (irow > nrow_global) THEN
2341 30 : irow = 1
2342 30 : icol = icol + 1
2343 : END IF
2344 : END DO
2345 : END DO
2346 : END IF
2347 :
2348 : END DO
2349 : END ASSOCIATE
2350 1226 : DEALLOCATE (vecbuf)
2351 :
2352 1226 : CALL ictxt_loc%gridexit()
2353 :
2354 1226 : DEALLOCATE (newdat)
2355 :
2356 : #else
2357 :
2358 : IF (unit > 0) THEN
2359 : DO j = 1, ncol_global
2360 : DO i = 1, nrow_global
2361 : WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
2362 : END DO
2363 : END DO
2364 : END IF
2365 :
2366 : #endif
2367 1226 : CALL timestop(handle)
2368 :
2369 6130 : END SUBROUTINE cp_fm_write_formatted
2370 :
2371 : ! **************************************************************************************************
2372 : !> \brief ...
2373 : !> \param fm ...
2374 : !> \param unit ...
2375 : ! **************************************************************************************************
2376 1516 : SUBROUTINE cp_fm_read_unformatted(fm, unit)
2377 : TYPE(cp_fm_type), INTENT(INOUT) :: fm
2378 : INTEGER, INTENT(IN) :: unit
2379 :
2380 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
2381 :
2382 : INTEGER :: handle, j, max_block, &
2383 : ncol_global, nrow_global
2384 : TYPE(mp_para_env_type), POINTER :: para_env
2385 : #if defined(__parallel)
2386 : INTEGER :: k, n_cols
2387 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: vecbuf
2388 : #endif
2389 :
2390 1516 : CALL timeset(routineN, handle)
2391 :
2392 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2393 1516 : para_env=para_env)
2394 :
2395 : #if defined(__parallel)
2396 :
2397 : ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2398 :
2399 6064 : ALLOCATE (vecbuf(nrow_global, max_block))
2400 :
2401 5684 : DO j = 1, ncol_global, max_block
2402 :
2403 4168 : n_cols = MIN(max_block, ncol_global - j + 1)
2404 4168 : IF (para_env%mepos == 0) THEN
2405 32577 : DO k = 1, n_cols
2406 32577 : READ (unit) vecbuf(:, k)
2407 : END DO
2408 : END IF
2409 11541680 : CALL para_env%bcast(vecbuf, 0)
2410 5684 : CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2411 :
2412 : END DO
2413 :
2414 1516 : DEALLOCATE (vecbuf)
2415 :
2416 : #else
2417 :
2418 : DO j = 1, ncol_global
2419 : READ (unit) fm%local_data(:, j)
2420 : END DO
2421 :
2422 : #endif
2423 :
2424 1516 : CALL timestop(handle)
2425 :
2426 1516 : END SUBROUTINE cp_fm_read_unformatted
2427 :
2428 : ! **************************************************************************************************
2429 : !> \brief ...
2430 : !> \param mm_type ...
2431 : ! **************************************************************************************************
2432 9801 : SUBROUTINE cp_fm_setup(mm_type)
2433 : INTEGER, INTENT(IN) :: mm_type
2434 :
2435 9801 : cp_fm_mm_type = mm_type
2436 9801 : END SUBROUTINE cp_fm_setup
2437 :
2438 : ! **************************************************************************************************
2439 : !> \brief ...
2440 : !> \return ...
2441 : ! **************************************************************************************************
2442 1417028 : FUNCTION cp_fm_get_mm_type() RESULT(res)
2443 : INTEGER :: res
2444 :
2445 1417028 : res = cp_fm_mm_type
2446 1417028 : END FUNCTION
2447 :
2448 : ! **************************************************************************************************
2449 : !> \brief ...
2450 : !> \param ictxt ...
2451 : !> \param prec ...
2452 : !> \return ...
2453 : ! **************************************************************************************************
2454 10 : FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2455 : INTEGER :: ictxt
2456 : CHARACTER(LEN=1) :: prec
2457 : INTEGER :: res
2458 : #if defined(__parallel)
2459 : INTEGER :: pilaenv
2460 10 : res = pilaenv(ictxt, prec)
2461 : #else
2462 : MARK_USED(ictxt)
2463 : MARK_USED(prec)
2464 : res = -1
2465 : #endif
2466 :
2467 10 : END FUNCTION
2468 :
2469 0 : END MODULE cp_fm_types
|