Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief basic linear algebra operations for full matrices
10 : !> \par History
11 : !> 08.2002 split out of qs_blacs [fawzi]
12 : !> \author Fawzi Mohamed
13 : ! **************************************************************************************************
14 : MODULE cp_fm_basic_linalg
15 : USE cp_blacs_env, ONLY: cp_blacs_env_type
16 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
17 : USE cp_fm_types, ONLY: &
18 : cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
19 : cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
20 : cp_fm_type
21 : USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
22 : cp_to_string
23 : USE kahan_sum, ONLY: accurate_dot_product, &
24 : accurate_sum
25 : USE kinds, ONLY: dp, &
26 : int_8, &
27 : sp
28 : USE machine, ONLY: m_memory
29 : USE mathlib, ONLY: get_pseudo_inverse_svd, &
30 : invert_matrix
31 : USE message_passing, ONLY: mp_comm_type
32 : #include "../base/base_uses.f90"
33 :
34 : IMPLICIT NONE
35 : PRIVATE
36 :
37 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
38 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39 :
40 : PUBLIC :: cp_fm_scale, & ! scale a matrix
41 : cp_fm_scale_and_add, & ! scale and add two matrices
42 : cp_fm_geadd, & ! general addition
43 : cp_fm_column_scale, & ! scale columns of a matrix
44 : cp_fm_row_scale, & ! scale rows of a matrix
45 : cp_fm_trace, & ! trace of the transpose(A)*B
46 : cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
47 : cp_fm_norm, & ! different norms of A
48 : cp_fm_schur_product, & ! schur product
49 : cp_fm_transpose, & ! transpose a matrix
50 : cp_fm_upper_to_full, & ! symmetrise an upper symmetric matrix
51 : cp_fm_syrk, & ! rank k update
52 : cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
53 : cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
54 : cp_fm_gemm, & ! multiply two matrices
55 : cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
56 : cp_fm_invert, & ! computes the inverse and determinant
57 : cp_fm_frobenius_norm, & ! frobenius norm
58 : cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
59 : cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
60 : cp_fm_solve, & ! solves the equation A*B=C A and C are input
61 : cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
62 : cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
63 : cp_fm_potrf, & ! Cholesky decomposition
64 : cp_fm_potri, & ! Invert triangular matrix
65 : cp_fm_rot_rows, & ! rotates two rows
66 : cp_fm_rot_cols, & ! rotates two columns
67 : cp_fm_cholesky_restore, & ! apply Cholesky decomposition
68 : cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
69 : cp_fm_det, & ! determinant of a real matrix with correct sign
70 : cp_fm_matvec ! matrix-vector multiplication (vector replicated)
71 :
72 : REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
73 : REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
74 :
75 : INTERFACE cp_fm_trace
76 : MODULE PROCEDURE cp_fm_trace_a0b0t0
77 : MODULE PROCEDURE cp_fm_trace_a1b0t1_a
78 : MODULE PROCEDURE cp_fm_trace_a1b0t1_p
79 : MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
80 : MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
81 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
82 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
83 : END INTERFACE cp_fm_trace
84 :
85 : INTERFACE cp_fm_contracted_trace
86 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
87 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
88 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
89 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
90 : END INTERFACE cp_fm_contracted_trace
91 : CONTAINS
92 :
93 : ! **************************************************************************************************
94 : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
95 : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
96 : ! **************************************************************************************************
97 0 : SUBROUTINE cp_fm_det(matrix_a, det_a)
98 :
99 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
100 : REAL(KIND=dp), INTENT(OUT) :: det_a
101 : REAL(KIND=dp) :: determinant
102 : TYPE(cp_fm_type) :: matrix_lu
103 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
104 : INTEGER :: n, i, info, P
105 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
106 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
107 :
108 : #if defined(__parallel)
109 : INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
110 : INTEGER, DIMENSION(9) :: desca
111 : #endif
112 :
113 : CALL cp_fm_create(matrix=matrix_lu, &
114 : matrix_struct=matrix_a%matrix_struct, &
115 0 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
116 0 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
117 :
118 0 : a => matrix_lu%local_data
119 0 : n = matrix_lu%matrix_struct%nrow_global
120 0 : ALLOCATE (ipivot(n))
121 0 : ipivot(:) = 0
122 0 : P = 0
123 0 : ALLOCATE (diag(n))
124 0 : diag(:) = 0.0_dp
125 : #if defined(__parallel)
126 : ! Use LU decomposition
127 0 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
128 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
129 0 : CALL cp_fm_get_diag(matrix_lu, diag)
130 0 : determinant = PRODUCT(diag)
131 0 : myprow = matrix_lu%matrix_struct%context%mepos(1)
132 0 : nprow = matrix_lu%matrix_struct%context%num_pe(1)
133 0 : npcol = matrix_lu%matrix_struct%context%num_pe(2)
134 0 : nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
135 0 : nrow_block = matrix_lu%matrix_struct%nrow_block
136 0 : DO irow_local = 1, nrow_local
137 0 : i = matrix_lu%matrix_struct%row_indices(irow_local)
138 0 : IF (ipivot(irow_local) /= i) P = P + 1
139 : END DO
140 0 : CALL matrix_lu%matrix_struct%para_env%sum(P)
141 : ! very important fix
142 0 : P = P/npcol
143 : #else
144 : CALL dgetrf(n, n, a, n, ipivot, info)
145 : CALL cp_fm_get_diag(matrix_lu, diag)
146 : determinant = PRODUCT(diag)
147 : DO i = 1, n
148 : IF (ipivot(i) /= i) P = P + 1
149 : END DO
150 : #endif
151 0 : DEALLOCATE (ipivot)
152 0 : DEALLOCATE (diag)
153 0 : CALL cp_fm_release(matrix_lu)
154 0 : det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
155 0 : END SUBROUTINE cp_fm_det
156 :
157 : ! **************************************************************************************************
158 : !> \brief calc A <- alpha*A + beta*B
159 : !> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
160 : !> scale A)
161 : !> \param alpha ...
162 : !> \param matrix_a ...
163 : !> \param beta ...
164 : !> \param matrix_b ...
165 : ! **************************************************************************************************
166 1062116 : SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
167 :
168 : REAL(KIND=dp), INTENT(IN) :: alpha
169 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
170 : REAL(KIND=dp), INTENT(in), OPTIONAL :: beta
171 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
172 :
173 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
174 :
175 : INTEGER :: handle, size_a, size_b
176 : REAL(KIND=dp) :: my_beta
177 1062116 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
178 1062116 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
179 :
180 1062116 : CALL timeset(routineN, handle)
181 :
182 1062116 : IF (PRESENT(matrix_b)) THEN
183 1062116 : my_beta = 1.0_dp
184 : ELSE
185 0 : my_beta = 0.0_dp
186 : END IF
187 1062116 : IF (PRESENT(beta)) my_beta = beta
188 1062116 : NULLIFY (a, b)
189 :
190 1062116 : IF (PRESENT(beta)) THEN
191 1062116 : CPASSERT(PRESENT(matrix_b))
192 1062116 : IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
193 0 : CPWARN("Bad use of routine. Call cp_fm_scale instead")
194 0 : CALL cp_fm_scale(alpha + beta, matrix_a)
195 0 : CALL timestop(handle)
196 0 : RETURN
197 : END IF
198 : END IF
199 :
200 1062116 : a => matrix_a%local_data
201 1062116 : a_sp => matrix_a%local_data_sp
202 :
203 1062116 : IF (matrix_a%use_sp) THEN
204 0 : size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
205 : ELSE
206 1062116 : size_a = SIZE(a, 1)*SIZE(a, 2)
207 : END IF
208 :
209 1062116 : IF (alpha /= 1.0_dp) THEN
210 79066 : IF (matrix_a%use_sp) THEN
211 0 : CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
212 : ELSE
213 79066 : CALL dscal(size_a, alpha, a, 1)
214 : END IF
215 : END IF
216 1062116 : IF (my_beta .NE. 0.0_dp) THEN
217 1052690 : IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
218 0 : CPABORT("matrixes must be in the same blacs context")
219 :
220 1052690 : IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
221 : matrix_b%matrix_struct)) THEN
222 :
223 1052690 : b => matrix_b%local_data
224 1052690 : b_sp => matrix_b%local_data_sp
225 1052690 : IF (matrix_b%use_sp) THEN
226 0 : size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
227 : ELSE
228 1052690 : size_b = SIZE(b, 1)*SIZE(b, 2)
229 : END IF
230 1052690 : IF (size_a /= size_b) &
231 0 : CPABORT("Matrixes must have same locale sizes")
232 :
233 1052690 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
234 0 : CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
235 1052690 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
236 0 : CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
237 1052690 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
238 0 : CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
239 : ELSE
240 1052690 : CALL daxpy(size_a, my_beta, b, 1, a, 1)
241 : END IF
242 :
243 : ELSE
244 : #ifdef __parallel
245 0 : CPABORT("to do (pdscal,pdcopy,pdaxpy)")
246 : #else
247 : CPABORT("")
248 : #endif
249 : END IF
250 :
251 : END IF
252 :
253 1062116 : CALL timestop(handle)
254 :
255 1062116 : END SUBROUTINE cp_fm_scale_and_add
256 :
257 : ! **************************************************************************************************
258 : !> \brief interface to BLACS geadd:
259 : !> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
260 : !> where opt(matrix_a) can be either:
261 : !> 'N': matrix_a
262 : !> 'T': matrix_a^T
263 : !> 'C': matrix_a^H (Hermitian conjugate)
264 : !> note that this is a level three routine, use cp_fm_scale_and_add if that
265 : !> is sufficient for your needs
266 : !> \param alpha : complex scalar
267 : !> \param trans : 'N' normal, 'T' transposed
268 : !> \param matrix_a : input matrix_a
269 : !> \param beta : complex scalar
270 : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
271 : !> \author Lianheng Tong
272 : ! **************************************************************************************************
273 96 : SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
274 : REAL(KIND=dp), INTENT(IN) :: alpha, beta
275 : CHARACTER, INTENT(IN) :: trans
276 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
277 :
278 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
279 :
280 : INTEGER :: nrow_global, ncol_global, handle
281 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
282 : #if defined(__parallel)
283 : INTEGER, DIMENSION(9) :: desca, descb
284 : #else
285 : INTEGER :: ii, jj
286 : #endif
287 :
288 96 : CALL timeset(routineN, handle)
289 :
290 96 : nrow_global = matrix_a%matrix_struct%nrow_global
291 96 : ncol_global = matrix_a%matrix_struct%ncol_global
292 96 : CPASSERT(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
293 96 : CPASSERT(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
294 :
295 96 : aa => matrix_a%local_data
296 96 : bb => matrix_b%local_data
297 :
298 : #if defined(__parallel)
299 960 : desca = matrix_a%matrix_struct%descriptor
300 960 : descb = matrix_b%matrix_struct%descriptor
301 : CALL pdgeadd(trans, &
302 : nrow_global, &
303 : ncol_global, &
304 : alpha, &
305 : aa, &
306 : 1, 1, &
307 : desca, &
308 : beta, &
309 : bb, &
310 : 1, 1, &
311 96 : descb)
312 : #else
313 : ! dgeadd is not a standard BLAS function, although is implemented
314 : ! in some libraries like OpenBLAS, so not going to use it here
315 : SELECT CASE (trans)
316 : CASE ('T')
317 : DO jj = 1, ncol_global
318 : DO ii = 1, nrow_global
319 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
320 : END DO
321 : END DO
322 : CASE DEFAULT
323 : DO jj = 1, ncol_global
324 : DO ii = 1, nrow_global
325 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
326 : END DO
327 : END DO
328 : END SELECT
329 : #endif
330 :
331 96 : CALL timestop(handle)
332 :
333 96 : END SUBROUTINE cp_fm_geadd
334 :
335 : ! **************************************************************************************************
336 : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
337 : !> IMPORTANT : the sign of the determinant is not defined correctly yet ....
338 : !> \param matrix_a ...
339 : !> \param almost_determinant ...
340 : !> \param correct_sign ...
341 : !> \par History
342 : !> added correct_sign 02.07 (fschiff)
343 : !> \author Joost VandeVondele
344 : !> \note
345 : !> - matrix_a is overwritten
346 : !> - the sign of the determinant might be wrong
347 : !> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
348 : !> - one should be able to find out if ipivot is an even or an odd permutation...
349 : !> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
350 : !> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
351 : ! **************************************************************************************************
352 0 : SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
353 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
354 : REAL(KIND=dp), INTENT(OUT) :: almost_determinant
355 : LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
356 :
357 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
358 :
359 : INTEGER :: handle, i, info, n
360 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
361 : REAL(KIND=dp) :: determinant
362 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
363 : #if defined(__parallel)
364 : INTEGER, DIMENSION(9) :: desca
365 0 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
366 : #else
367 : INTEGER :: lda
368 : #endif
369 :
370 0 : CALL timeset(routineN, handle)
371 :
372 0 : a => matrix_a%local_data
373 0 : n = matrix_a%matrix_struct%nrow_global
374 0 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
375 :
376 : #if defined(__parallel)
377 : MARK_USED(correct_sign)
378 0 : desca(:) = matrix_a%matrix_struct%descriptor(:)
379 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
380 :
381 0 : ALLOCATE (diag(n))
382 0 : diag(:) = 0.0_dp
383 0 : CALL cp_fm_get_diag(matrix_a, diag)
384 0 : determinant = 1.0_dp
385 0 : DO i = 1, n
386 0 : determinant = determinant*diag(i)
387 : END DO
388 0 : DEALLOCATE (diag)
389 : #else
390 : lda = SIZE(a, 1)
391 : CALL dgetrf(n, n, a, lda, ipivot, info)
392 : determinant = 1.0_dp
393 : IF (correct_sign) THEN
394 : DO i = 1, n
395 : IF (ipivot(i) .NE. i) THEN
396 : determinant = -determinant*a(i, i)
397 : ELSE
398 : determinant = determinant*a(i, i)
399 : END IF
400 : END DO
401 : ELSE
402 : DO i = 1, n
403 : determinant = determinant*a(i, i)
404 : END DO
405 : END IF
406 : #endif
407 : ! info is allowed to be zero
408 : ! this does just signal a zero diagonal element
409 0 : DEALLOCATE (ipivot)
410 0 : almost_determinant = determinant ! notice that the sign is random
411 0 : CALL timestop(handle)
412 0 : END SUBROUTINE
413 :
414 : ! **************************************************************************************************
415 : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
416 : !> \param transa : 'N' -> normal 'T' -> transpose
417 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
418 : !> \param transb ...
419 : !> \param m ...
420 : !> \param n ...
421 : !> \param k ...
422 : !> \param alpha ...
423 : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
424 : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
425 : !> \param beta ...
426 : !> \param matrix_c : m x n matrix
427 : !> \param a_first_col ...
428 : !> \param a_first_row ...
429 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
430 : !> \param b_first_row ...
431 : !> \param c_first_col ...
432 : !> \param c_first_row ...
433 : !> \author Matthias Krack
434 : !> \note
435 : !> matrix_c should have no overlap with matrix_a, matrix_b
436 : ! **************************************************************************************************
437 550 : SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
438 : matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
439 : c_first_col, c_first_row)
440 :
441 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
442 : INTEGER, INTENT(IN) :: m, n, k
443 : REAL(KIND=dp), INTENT(IN) :: alpha
444 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
445 : REAL(KIND=dp), INTENT(IN) :: beta
446 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
447 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
448 : b_first_col, b_first_row, &
449 : c_first_col, c_first_row
450 :
451 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
452 :
453 : INTEGER :: handle, i_a, i_b, i_c, j_a, &
454 : j_b, j_c
455 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
456 550 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
457 : #if defined(__parallel)
458 : INTEGER, DIMENSION(9) :: desca, descb, descc
459 : #else
460 : INTEGER :: lda, ldb, ldc
461 : #endif
462 :
463 550 : CALL timeset(routineN, handle)
464 :
465 : !sample peak memory
466 550 : CALL m_memory()
467 :
468 550 : a => matrix_a%local_data
469 550 : b => matrix_b%local_data
470 550 : c => matrix_c%local_data
471 :
472 550 : a_sp => matrix_a%local_data_sp
473 550 : b_sp => matrix_b%local_data_sp
474 550 : c_sp => matrix_c%local_data_sp
475 :
476 550 : IF (PRESENT(a_first_row)) THEN
477 0 : i_a = a_first_row
478 : ELSE
479 550 : i_a = 1
480 : END IF
481 550 : IF (PRESENT(a_first_col)) THEN
482 0 : j_a = a_first_col
483 : ELSE
484 550 : j_a = 1
485 : END IF
486 550 : IF (PRESENT(b_first_row)) THEN
487 0 : i_b = b_first_row
488 : ELSE
489 550 : i_b = 1
490 : END IF
491 550 : IF (PRESENT(b_first_col)) THEN
492 0 : j_b = b_first_col
493 : ELSE
494 550 : j_b = 1
495 : END IF
496 550 : IF (PRESENT(c_first_row)) THEN
497 0 : i_c = c_first_row
498 : ELSE
499 550 : i_c = 1
500 : END IF
501 550 : IF (PRESENT(c_first_col)) THEN
502 0 : j_c = c_first_col
503 : ELSE
504 550 : j_c = 1
505 : END IF
506 :
507 : #if defined(__parallel)
508 :
509 5500 : desca(:) = matrix_a%matrix_struct%descriptor(:)
510 5500 : descb(:) = matrix_b%matrix_struct%descriptor(:)
511 5500 : descc(:) = matrix_c%matrix_struct%descriptor(:)
512 :
513 550 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
514 :
515 : CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
516 0 : descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
517 :
518 550 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
519 :
520 : CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
521 550 : descb, beta, c, i_c, j_c, descc)
522 :
523 : ELSE
524 0 : CPABORT("Mixed precision gemm NYI")
525 : END IF
526 : #else
527 :
528 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
529 :
530 : lda = SIZE(a_sp, 1)
531 : ldb = SIZE(b_sp, 1)
532 : ldc = SIZE(c_sp, 1)
533 :
534 : CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
535 : REAL(beta, sp), c_sp(i_c, j_c), ldc)
536 :
537 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
538 :
539 : lda = SIZE(a, 1)
540 : ldb = SIZE(b, 1)
541 : ldc = SIZE(c, 1)
542 :
543 : CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
544 :
545 : ELSE
546 : CPABORT("Mixed precision gemm NYI")
547 : END IF
548 :
549 : #endif
550 550 : CALL timestop(handle)
551 :
552 550 : END SUBROUTINE cp_fm_gemm
553 :
554 : ! **************************************************************************************************
555 : !> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
556 : !> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
557 : !> where matrix_a is symmetric
558 : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
559 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
560 : !> \param uplo ...
561 : !> \param m ...
562 : !> \param n ...
563 : !> \param alpha ...
564 : !> \param matrix_a : m x m matrix
565 : !> \param matrix_b : m x n matrix
566 : !> \param beta ...
567 : !> \param matrix_c : m x n matrix
568 : !> \author Matthias Krack
569 : !> \note
570 : !> matrix_c should have no overlap with matrix_a, matrix_b
571 : !> all matrices in QS are upper triangular, so uplo should be 'U' always
572 : !> matrix_a is always an m x m matrix
573 : !> it is typically slower to do cp_fm_symm than cp_fm_gemm (especially in parallel easily 50 percent !)
574 : ! **************************************************************************************************
575 143496 : SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
576 :
577 : CHARACTER(LEN=1), INTENT(IN) :: side, uplo
578 : INTEGER, INTENT(IN) :: m, n
579 : REAL(KIND=dp), INTENT(IN) :: alpha
580 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
581 : REAL(KIND=dp), INTENT(IN) :: beta
582 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
583 :
584 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
585 :
586 : INTEGER :: handle
587 143496 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
588 : #if defined(__parallel)
589 : INTEGER, DIMENSION(9) :: desca, descb, descc
590 : #else
591 : INTEGER :: lda, ldb, ldc
592 : #endif
593 :
594 143496 : CALL timeset(routineN, handle)
595 :
596 143496 : a => matrix_a%local_data
597 143496 : b => matrix_b%local_data
598 143496 : c => matrix_c%local_data
599 :
600 : #if defined(__parallel)
601 :
602 1434960 : desca(:) = matrix_a%matrix_struct%descriptor(:)
603 1434960 : descb(:) = matrix_b%matrix_struct%descriptor(:)
604 1434960 : descc(:) = matrix_c%matrix_struct%descriptor(:)
605 :
606 143496 : CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
607 :
608 : #else
609 :
610 : lda = matrix_a%matrix_struct%local_leading_dimension
611 : ldb = matrix_b%matrix_struct%local_leading_dimension
612 : ldc = matrix_c%matrix_struct%local_leading_dimension
613 :
614 : CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
615 :
616 : #endif
617 143496 : CALL timestop(handle)
618 :
619 143496 : END SUBROUTINE cp_fm_symm
620 :
621 : ! **************************************************************************************************
622 : !> \brief computes the Frobenius norm of matrix_a
623 : !> \brief computes the Frobenius norm of matrix_a
624 : !> \param matrix_a : m x n matrix
625 : !> \return ...
626 : !> \author VW
627 : ! **************************************************************************************************
628 8030 : FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
629 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
630 : REAL(KIND=dp) :: norm
631 :
632 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
633 :
634 : INTEGER :: handle, size_a
635 8030 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
636 : REAL(KIND=dp), EXTERNAL :: DDOT
637 : #if defined(__parallel)
638 : TYPE(mp_comm_type) :: group
639 : #endif
640 :
641 8030 : CALL timeset(routineN, handle)
642 :
643 : norm = 0.0_dp
644 8030 : a => matrix_a%local_data
645 8030 : size_a = SIZE(a, 1)*SIZE(a, 2)
646 8030 : norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
647 : #if defined(__parallel)
648 8030 : group = matrix_a%matrix_struct%para_env
649 8030 : CALL group%sum(norm)
650 : #endif
651 8030 : norm = SQRT(norm)
652 :
653 8030 : CALL timestop(handle)
654 :
655 8030 : END FUNCTION cp_fm_frobenius_norm
656 :
657 : ! **************************************************************************************************
658 : !> \brief performs a rank-k update of a symmetric matrix_c
659 : !> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
660 : !> \param uplo : 'U' ('L')
661 : !> \param trans : 'N' ('T')
662 : !> \param k : number of cols to use in matrix_a
663 : !> ia,ja :: 1,1 (could be used for selecting subblock of a)
664 : !> \param alpha ...
665 : !> \param matrix_a ...
666 : !> \param ia ...
667 : !> \param ja ...
668 : !> \param beta ...
669 : !> \param matrix_c ...
670 : !> \author Matthias Krack
671 : !> \note
672 : !> In QS uplo should 'U' (upper part updated)
673 : ! **************************************************************************************************
674 7006 : SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
675 : CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
676 : INTEGER, INTENT(IN) :: k
677 : REAL(KIND=dp), INTENT(IN) :: alpha
678 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
679 : INTEGER, INTENT(IN) :: ia, ja
680 : REAL(KIND=dp), INTENT(IN) :: beta
681 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
682 :
683 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
684 :
685 : INTEGER :: handle, n
686 7006 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
687 : #if defined(__parallel)
688 : INTEGER, DIMENSION(9) :: desca, descc
689 : #else
690 : INTEGER :: lda, ldc
691 : #endif
692 :
693 7006 : CALL timeset(routineN, handle)
694 :
695 7006 : n = matrix_c%matrix_struct%nrow_global
696 :
697 7006 : a => matrix_a%local_data
698 7006 : c => matrix_c%local_data
699 :
700 : #if defined(__parallel)
701 :
702 70060 : desca(:) = matrix_a%matrix_struct%descriptor(:)
703 70060 : descc(:) = matrix_c%matrix_struct%descriptor(:)
704 :
705 7006 : CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
706 :
707 : #else
708 :
709 : lda = SIZE(a, 1)
710 : ldc = SIZE(c, 1)
711 :
712 : CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
713 :
714 : #endif
715 7006 : CALL timestop(handle)
716 :
717 7006 : END SUBROUTINE cp_fm_syrk
718 :
719 : ! **************************************************************************************************
720 : !> \brief computes the schur product of two matrices
721 : !> c_ij = a_ij * b_ij
722 : !> \param matrix_a ...
723 : !> \param matrix_b ...
724 : !> \param matrix_c ...
725 : !> \author Joost VandeVondele
726 : ! **************************************************************************************************
727 9190 : SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
728 :
729 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
730 :
731 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
732 :
733 : INTEGER :: handle, icol_local, irow_local, mypcol, &
734 : myprow, ncol_local, npcol, nprow, &
735 : nrow_local
736 9190 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
737 : TYPE(cp_blacs_env_type), POINTER :: context
738 :
739 9190 : CALL timeset(routineN, handle)
740 :
741 9190 : context => matrix_a%matrix_struct%context
742 9190 : myprow = context%mepos(1)
743 9190 : mypcol = context%mepos(2)
744 9190 : nprow = context%num_pe(1)
745 9190 : npcol = context%num_pe(2)
746 :
747 9190 : a => matrix_a%local_data
748 9190 : b => matrix_b%local_data
749 9190 : c => matrix_c%local_data
750 :
751 9190 : nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
752 9190 : ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
753 :
754 99952 : DO icol_local = 1, ncol_local
755 6860227 : DO irow_local = 1, nrow_local
756 6851037 : c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
757 : END DO
758 : END DO
759 :
760 9190 : CALL timestop(handle)
761 :
762 9190 : END SUBROUTINE cp_fm_schur_product
763 :
764 : ! **************************************************************************************************
765 : !> \brief returns the trace of matrix_a^T matrix_b, i.e
766 : !> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
767 : !> \param matrix_a a matrix
768 : !> \param matrix_b another matrix
769 : !> \param trace ...
770 : !> \par History
771 : !> 11.06.2001 Creation (Matthias Krack)
772 : !> 12.2002 added doc [fawzi]
773 : !> \author Matthias Krack
774 : !> \note
775 : !> note the transposition of matrix_a!
776 : ! **************************************************************************************************
777 693910 : SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
778 :
779 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
780 : REAL(KIND=dp), INTENT(OUT) :: trace
781 :
782 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
783 :
784 : INTEGER :: handle, mypcol, myprow, ncol_local, &
785 : npcol, nprow, nrow_local
786 693910 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
787 693910 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
788 : TYPE(cp_blacs_env_type), POINTER :: context
789 : TYPE(mp_comm_type) :: group
790 :
791 693910 : CALL timeset(routineN, handle)
792 :
793 693910 : context => matrix_a%matrix_struct%context
794 693910 : myprow = context%mepos(1)
795 693910 : mypcol = context%mepos(2)
796 693910 : nprow = context%num_pe(1)
797 693910 : npcol = context%num_pe(2)
798 :
799 693910 : group = matrix_a%matrix_struct%para_env
800 :
801 693910 : a => matrix_a%local_data
802 693910 : b => matrix_b%local_data
803 :
804 693910 : a_sp => matrix_a%local_data_sp
805 693910 : b_sp => matrix_b%local_data_sp
806 :
807 693910 : nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
808 693910 : ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
809 :
810 : ! cries for an accurate_dot_product
811 693910 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
812 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
813 0 : b_sp(1:nrow_local, 1:ncol_local), dp))
814 693910 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
815 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
816 0 : b(1:nrow_local, 1:ncol_local))
817 693910 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
818 : trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
819 0 : REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
820 : ELSE
821 : trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
822 693910 : b(1:nrow_local, 1:ncol_local))
823 : END IF
824 :
825 693910 : CALL group%sum(trace)
826 :
827 693910 : CALL timestop(handle)
828 :
829 693910 : END SUBROUTINE cp_fm_trace_a0b0t0
830 :
831 : #:mute
832 : #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
833 : #:endmute
834 :
835 : ! **************************************************************************************************
836 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
837 : !> \param matrix_a list of A matrices
838 : !> \param matrix_b B matrix
839 : !> \param trace computed traces
840 : !> \par History
841 : !> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
842 : !> \note \parblock
843 : !> Computing the trace requires collective communication between involved MPI processes
844 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
845 : !> the amount of time wasted in such synchronisation by performing one large collective
846 : !> operation which involves all the matrices in question.
847 : !>
848 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
849 : !> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
850 : !> 'matrix_b' is a single matrix.
851 : !> \endparblock
852 : ! **************************************************************************************************
853 : #:for longname, shortname, appendix in types
854 3030 : SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
855 : TYPE(${longname}$), DIMENSION(:), INTENT(in) :: matrix_a
856 : TYPE(cp_fm_type), INTENT(IN) :: matrix_b
857 : REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
858 :
859 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
860 :
861 : INTEGER :: handle, imatrix, n_matrices, &
862 : ncols_local, nrows_local
863 : LOGICAL :: use_sp_a, use_sp_b
864 3030 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
865 3030 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
866 : TYPE(mp_comm_type) :: group
867 :
868 3030 : CALL timeset(routineN, handle)
869 :
870 3030 : n_matrices = SIZE(trace)
871 3030 : CPASSERT(SIZE(matrix_a) == n_matrices)
872 :
873 3030 : CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
874 3030 : use_sp_b = matrix_b%use_sp
875 :
876 3030 : IF (use_sp_b) THEN
877 0 : ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
878 : ELSE
879 3030 : ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
880 : END IF
881 :
882 : !$OMP PARALLEL DO DEFAULT(NONE), &
883 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
884 : !$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
885 3030 : !$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
886 :
887 : DO imatrix = 1, n_matrices
888 :
889 : use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
890 :
891 : ! assume that the matrices A(i) and B have identical shapes and distribution schemes
892 : IF (use_sp_a .AND. use_sp_b) THEN
893 : ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
894 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
895 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
896 : ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
897 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
898 : ELSE
899 : CPABORT("Matrices A and B are of different types")
900 : END IF
901 : END DO
902 : !$OMP END PARALLEL DO
903 :
904 3030 : group = matrix_b%matrix_struct%para_env
905 18882 : CALL group%sum(trace)
906 :
907 3030 : CALL timestop(handle)
908 3030 : END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
909 : #:endfor
910 :
911 : ! **************************************************************************************************
912 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
913 : !> \param matrix_a list of A matrices
914 : !> \param matrix_b list of B matrices
915 : !> \param trace computed traces
916 : !> \param accurate ...
917 : !> \par History
918 : !> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
919 : !> \note \parblock
920 : !> Computing the trace requires collective communication between involved MPI processes
921 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
922 : !> the amount of time wasted in such synchronisation by performing one large collective
923 : !> operation which involves all the matrices in question.
924 : !>
925 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
926 : !> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
927 : !> \endparblock
928 : ! **************************************************************************************************
929 : #:for longname1, shortname1, appendix1 in types
930 : #:for longname2, shortname2, appendix2 in types
931 138648 : SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
932 : TYPE(${longname1}$), DIMENSION(:), INTENT(in) :: matrix_a
933 : TYPE(${longname2}$), DIMENSION(:), INTENT(in) :: matrix_b
934 : REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace
935 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
936 :
937 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
938 :
939 : INTEGER :: handle, imatrix, n_matrices, &
940 : ncols_local, nrows_local
941 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
942 138648 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
943 138648 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
944 : TYPE(mp_comm_type) :: group
945 :
946 138648 : CALL timeset(routineN, handle)
947 :
948 138648 : n_matrices = SIZE(trace)
949 138648 : CPASSERT(SIZE(matrix_a) == n_matrices)
950 138648 : CPASSERT(SIZE(matrix_b) == n_matrices)
951 :
952 138648 : use_accurate_sum = .TRUE.
953 138648 : IF (PRESENT(accurate)) use_accurate_sum = accurate
954 :
955 : !$OMP PARALLEL DO DEFAULT(NONE), &
956 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
957 : !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
958 138648 : !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
959 : DO imatrix = 1, n_matrices
960 : CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
961 :
962 : use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
963 : use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
964 :
965 : ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
966 : IF (use_sp_a .AND. use_sp_b) THEN
967 : ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
968 : ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
969 : IF (use_accurate_sum) THEN
970 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
971 : ELSE
972 : trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
973 : END IF
974 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
975 : ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
976 : ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
977 : IF (use_accurate_sum) THEN
978 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
979 : ELSE
980 : trace(imatrix) = SUM(ldata_a*ldata_b)
981 : END IF
982 : ELSE
983 : CPABORT("Matrices A and B are of different types")
984 : END IF
985 : END DO
986 : !$OMP END PARALLEL DO
987 :
988 138648 : group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
989 460424 : CALL group%sum(trace)
990 :
991 138648 : CALL timestop(handle)
992 138648 : END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
993 : #:endfor
994 : #:endfor
995 :
996 : ! **************************************************************************************************
997 : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
998 : !> \param matrix_a list of A matrices
999 : !> \param matrix_b list of B matrices
1000 : !> \param trace computed traces
1001 : !> \param accurate ...
1002 : ! **************************************************************************************************
1003 : #:for longname1, shortname1, appendix1 in types
1004 : #:for longname2, shortname2, appendix2 in types
1005 13816 : SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
1006 : TYPE(${longname1}$), DIMENSION(:, :), INTENT(in) :: matrix_a
1007 : TYPE(${longname2}$), DIMENSION(:, :), INTENT(in) :: matrix_b
1008 : REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace
1009 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
1010 :
1011 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
1012 :
1013 : INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1014 : nrows_local, nz
1015 : INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1016 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1017 : REAL(kind=dp) :: t
1018 13816 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1019 13816 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1020 : TYPE(mp_comm_type) :: group
1021 :
1022 13816 : CALL timeset(routineN, handle)
1023 :
1024 13816 : nz = SIZE(matrix_a, 1)
1025 13816 : CPASSERT(SIZE(matrix_b, 1) == nz)
1026 :
1027 13816 : na = SIZE(matrix_a, 2)
1028 13816 : nb = SIZE(matrix_b, 2)
1029 13816 : CPASSERT(SIZE(trace, 1) == na)
1030 13816 : CPASSERT(SIZE(trace, 2) == nb)
1031 :
1032 13816 : use_accurate_sum = .TRUE.
1033 13816 : IF (PRESENT(accurate)) use_accurate_sum = accurate
1034 :
1035 : ! here we use one running index (itrace) instead of two (ia, ib) in order to
1036 : ! improve load balance between shared-memory threads
1037 13816 : ntraces = na*nb
1038 13816 : na8 = INT(na, kind=int_8)
1039 :
1040 : !$OMP PARALLEL DO DEFAULT(NONE), &
1041 : !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1042 : !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1043 13816 : !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1044 : DO itrace = 1, ntraces
1045 : ib8 = (itrace - 1)/na8
1046 : ia = INT(itrace - ib8*na8)
1047 : ib = INT(ib8) + 1
1048 :
1049 : t = 0.0_dp
1050 : DO iz = 1, nz
1051 : CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
1052 : use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
1053 : use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
1054 :
1055 : ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1056 : IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1057 : ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
1058 : ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
1059 : IF (use_accurate_sum) THEN
1060 : t = t + accurate_dot_product(ldata_a, ldata_b)
1061 : ELSE
1062 : t = t + SUM(ldata_a*ldata_b)
1063 : END IF
1064 : ELSE IF (use_sp_a .AND. use_sp_b) THEN
1065 : ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
1066 : ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
1067 : IF (use_accurate_sum) THEN
1068 : t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1069 : ELSE
1070 : t = t + SUM(ldata_a_sp*ldata_b_sp)
1071 : END IF
1072 : ELSE
1073 : CPABORT("Matrices A and B are of different types")
1074 : END IF
1075 : END DO
1076 : trace(ia, ib) = t
1077 : END DO
1078 : !$OMP END PARALLEL DO
1079 :
1080 13816 : group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
1081 617500 : CALL group%sum(trace)
1082 :
1083 13816 : CALL timestop(handle)
1084 13816 : END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
1085 : #:endfor
1086 : #:endfor
1087 :
1088 : ! **************************************************************************************************
1089 : !> \brief multiplies in place by a triangular matrix:
1090 : !> matrix_b = alpha op(triangular_matrix) matrix_b
1091 : !> or (if side='R')
1092 : !> matrix_b = alpha matrix_b op(triangular_matrix)
1093 : !> op(triangular_matrix) is:
1094 : !> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1095 : !> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1096 : !> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1097 : !> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1098 : !> \param triangular_matrix the triangular matrix that multiplies the other
1099 : !> \param matrix_b the matrix that gets multiplied and stores the result
1100 : !> \param side on which side of matrix_b stays op(triangular_matrix)
1101 : !> (defaults to 'L')
1102 : !> \param transpose_tr if the triangular matrix should be transposed
1103 : !> (defaults to false)
1104 : !> \param invert_tr if the triangular matrix should be inverted
1105 : !> (defaults to false)
1106 : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1107 : !> lower ('L') triangle (defaults to 'U')
1108 : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1109 : !> be assumed to be 1 (defaults to false)
1110 : !> \param n_rows the number of rows of the result (defaults to
1111 : !> size(matrix_b,1))
1112 : !> \param n_cols the number of columns of the result (defaults to
1113 : !> size(matrix_b,2))
1114 : !> \param alpha ...
1115 : !> \par History
1116 : !> 08.2002 created [fawzi]
1117 : !> \author Fawzi Mohamed
1118 : !> \note
1119 : !> needs an mpi env
1120 : ! **************************************************************************************************
1121 104694 : SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1122 : transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1123 : alpha)
1124 : TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1125 : CHARACTER, INTENT(in), OPTIONAL :: side
1126 : LOGICAL, INTENT(in), OPTIONAL :: transpose_tr, invert_tr
1127 : CHARACTER, INTENT(in), OPTIONAL :: uplo_tr
1128 : LOGICAL, INTENT(in), OPTIONAL :: unit_diag_tr
1129 : INTEGER, INTENT(in), OPTIONAL :: n_rows, n_cols
1130 : REAL(KIND=dp), INTENT(in), OPTIONAL :: alpha
1131 :
1132 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
1133 :
1134 : CHARACTER :: side_char, transa, unit_diag, uplo
1135 : INTEGER :: handle, m, n
1136 : LOGICAL :: invert
1137 : REAL(KIND=dp) :: al
1138 :
1139 52347 : CALL timeset(routineN, handle)
1140 52347 : side_char = 'L'
1141 52347 : unit_diag = 'N'
1142 52347 : uplo = 'U'
1143 52347 : transa = 'N'
1144 52347 : invert = .FALSE.
1145 52347 : al = 1.0_dp
1146 52347 : CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1147 52347 : IF (PRESENT(side)) side_char = side
1148 52347 : IF (PRESENT(invert_tr)) invert = invert_tr
1149 52347 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1150 52347 : IF (PRESENT(unit_diag_tr)) THEN
1151 0 : IF (unit_diag_tr) THEN
1152 0 : unit_diag = 'U'
1153 : ELSE
1154 : unit_diag = 'N'
1155 : END IF
1156 : END IF
1157 52347 : IF (PRESENT(transpose_tr)) THEN
1158 3540 : IF (transpose_tr) THEN
1159 1280 : transa = 'T'
1160 : ELSE
1161 : transa = 'N'
1162 : END IF
1163 : END IF
1164 52347 : IF (PRESENT(alpha)) al = alpha
1165 52347 : IF (PRESENT(n_rows)) m = n_rows
1166 52347 : IF (PRESENT(n_cols)) n = n_cols
1167 :
1168 52347 : IF (invert) THEN
1169 :
1170 : #if defined(__parallel)
1171 : CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1172 : triangular_matrix%local_data(1, 1), 1, 1, &
1173 : triangular_matrix%matrix_struct%descriptor, &
1174 : matrix_b%local_data(1, 1), 1, 1, &
1175 42597 : matrix_b%matrix_struct%descriptor(1))
1176 : #else
1177 : CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1178 : triangular_matrix%local_data(1, 1), &
1179 : SIZE(triangular_matrix%local_data, 1), &
1180 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1181 : #endif
1182 :
1183 : ELSE
1184 :
1185 : #if defined(__parallel)
1186 : CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1187 : triangular_matrix%local_data(1, 1), 1, 1, &
1188 : triangular_matrix%matrix_struct%descriptor, &
1189 : matrix_b%local_data(1, 1), 1, 1, &
1190 9750 : matrix_b%matrix_struct%descriptor(1))
1191 : #else
1192 : CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1193 : triangular_matrix%local_data(1, 1), &
1194 : SIZE(triangular_matrix%local_data, 1), &
1195 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1196 : #endif
1197 :
1198 : END IF
1199 :
1200 52347 : CALL timestop(handle)
1201 52347 : END SUBROUTINE cp_fm_triangular_multiply
1202 :
1203 : ! **************************************************************************************************
1204 : !> \brief scales a matrix
1205 : !> matrix_a = alpha * matrix_b
1206 : !> \param alpha ...
1207 : !> \param matrix_a ...
1208 : !> \note
1209 : !> use cp_fm_set_all to zero (avoids problems with nan)
1210 : ! **************************************************************************************************
1211 84017 : SUBROUTINE cp_fm_scale(alpha, matrix_a)
1212 : REAL(KIND=dp), INTENT(IN) :: alpha
1213 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1214 :
1215 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale'
1216 :
1217 : INTEGER :: handle, size_a
1218 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1219 :
1220 84017 : CALL timeset(routineN, handle)
1221 :
1222 : NULLIFY (a)
1223 :
1224 84017 : a => matrix_a%local_data
1225 84017 : size_a = SIZE(a, 1)*SIZE(a, 2)
1226 :
1227 84017 : CALL DSCAL(size_a, alpha, a, 1)
1228 :
1229 84017 : CALL timestop(handle)
1230 :
1231 84017 : END SUBROUTINE cp_fm_scale
1232 :
1233 : ! **************************************************************************************************
1234 : !> \brief transposes a matrix
1235 : !> matrixt = matrix ^ T
1236 : !> \param matrix ...
1237 : !> \param matrixt ...
1238 : !> \note
1239 : !> all matrix elements are transposed (see cp_fm_upper_to_half to symmetrise a matrix)
1240 : ! **************************************************************************************************
1241 26502 : SUBROUTINE cp_fm_transpose(matrix, matrixt)
1242 : TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1243 :
1244 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
1245 :
1246 : INTEGER :: handle, ncol_global, &
1247 : nrow_global, ncol_globalt, nrow_globalt
1248 13251 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
1249 : #if defined(__parallel)
1250 : INTEGER, DIMENSION(9) :: desca, descc
1251 : #else
1252 : INTEGER :: i, j
1253 : #endif
1254 :
1255 13251 : nrow_global = matrix%matrix_struct%nrow_global
1256 13251 : ncol_global = matrix%matrix_struct%ncol_global
1257 13251 : nrow_globalt = matrixt%matrix_struct%nrow_global
1258 13251 : ncol_globalt = matrixt%matrix_struct%ncol_global
1259 0 : CPASSERT(nrow_global == ncol_globalt)
1260 13251 : CPASSERT(nrow_globalt == ncol_global)
1261 :
1262 13251 : CALL timeset(routineN, handle)
1263 :
1264 13251 : a => matrix%local_data
1265 13251 : c => matrixt%local_data
1266 :
1267 : #if defined(__parallel)
1268 132510 : desca(:) = matrix%matrix_struct%descriptor(:)
1269 132510 : descc(:) = matrixt%matrix_struct%descriptor(:)
1270 13251 : CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1271 : #else
1272 : DO j = 1, ncol_global
1273 : DO i = 1, nrow_global
1274 : c(j, i) = a(i, j)
1275 : END DO
1276 : END DO
1277 : #endif
1278 13251 : CALL timestop(handle)
1279 :
1280 13251 : END SUBROUTINE cp_fm_transpose
1281 :
1282 : ! **************************************************************************************************
1283 : !> \brief given an upper triangular matrix computes the corresponding full matrix
1284 : !> \param matrix the upper triangular matrix as input, the full matrix as output
1285 : !> \param work a matrix of the same size as matrix
1286 : !> \author Matthias Krack
1287 : !> \note
1288 : !> the lower triangular part is irrelevant
1289 : ! **************************************************************************************************
1290 312452 : SUBROUTINE cp_fm_upper_to_full(matrix, work)
1291 :
1292 : TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1293 :
1294 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_upper_to_full'
1295 :
1296 : INTEGER :: handle, icol_global, irow_global, &
1297 : mypcol, myprow, ncol_global, &
1298 : npcol, nprow, nrow_global
1299 156226 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1300 156226 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1301 : TYPE(cp_blacs_env_type), POINTER :: context
1302 :
1303 : #if defined(__parallel)
1304 : INTEGER :: icol_local, irow_local, &
1305 : ncol_block, ncol_local, &
1306 : nrow_block, nrow_local
1307 : INTEGER, DIMENSION(9) :: desca, descc
1308 156226 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: c
1309 156226 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: c_sp
1310 : #endif
1311 :
1312 156226 : nrow_global = matrix%matrix_struct%nrow_global
1313 156226 : ncol_global = matrix%matrix_struct%ncol_global
1314 0 : CPASSERT(nrow_global == ncol_global)
1315 156226 : nrow_global = work%matrix_struct%nrow_global
1316 156226 : ncol_global = work%matrix_struct%ncol_global
1317 156226 : CPASSERT(nrow_global == ncol_global)
1318 156226 : CPASSERT(matrix%use_sp .EQV. work%use_sp)
1319 :
1320 156226 : CALL timeset(routineN, handle)
1321 :
1322 156226 : context => matrix%matrix_struct%context
1323 156226 : myprow = context%mepos(1)
1324 156226 : mypcol = context%mepos(2)
1325 156226 : nprow = context%num_pe(1)
1326 156226 : npcol = context%num_pe(2)
1327 :
1328 : #if defined(__parallel)
1329 :
1330 156226 : nrow_block = matrix%matrix_struct%nrow_block
1331 156226 : ncol_block = matrix%matrix_struct%ncol_block
1332 :
1333 156226 : nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1334 156226 : ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1335 :
1336 156226 : a => work%local_data
1337 156226 : a_sp => work%local_data_sp
1338 1562260 : desca(:) = work%matrix_struct%descriptor(:)
1339 156226 : c => matrix%local_data
1340 156226 : c_sp => matrix%local_data_sp
1341 1562260 : descc(:) = matrix%matrix_struct%descriptor(:)
1342 :
1343 5232190 : DO icol_local = 1, ncol_local
1344 5075964 : icol_global = matrix%matrix_struct%col_indices(icol_local)
1345 238720986 : DO irow_local = 1, nrow_local
1346 233488796 : irow_global = matrix%matrix_struct%row_indices(irow_local)
1347 238564760 : IF (irow_global > icol_global) THEN
1348 115223895 : IF (matrix%use_sp) THEN
1349 0 : c_sp(irow_local, icol_local) = 0.0_sp
1350 : ELSE
1351 115223895 : c(irow_local, icol_local) = 0.0_dp
1352 : END IF
1353 118264901 : ELSE IF (irow_global == icol_global) THEN
1354 3041006 : IF (matrix%use_sp) THEN
1355 0 : c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1356 : ELSE
1357 3041006 : c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1358 : END IF
1359 : END IF
1360 : END DO
1361 : END DO
1362 :
1363 5232190 : DO icol_local = 1, ncol_local
1364 238720986 : DO irow_local = 1, nrow_local
1365 238564760 : IF (matrix%use_sp) THEN
1366 0 : a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1367 : ELSE
1368 233488796 : a(irow_local, icol_local) = c(irow_local, icol_local)
1369 : END IF
1370 : END DO
1371 : END DO
1372 :
1373 156226 : IF (matrix%use_sp) THEN
1374 0 : CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1375 : ELSE
1376 156226 : CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1377 : END IF
1378 :
1379 : #else
1380 :
1381 : a => matrix%local_data
1382 : a_sp => matrix%local_data_sp
1383 : DO irow_global = 1, nrow_global
1384 : DO icol_global = irow_global + 1, ncol_global
1385 : IF (matrix%use_sp) THEN
1386 : a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1387 : ELSE
1388 : a(icol_global, irow_global) = a(irow_global, icol_global)
1389 : END IF
1390 : END DO
1391 : END DO
1392 :
1393 : #endif
1394 156226 : CALL timestop(handle)
1395 :
1396 156226 : END SUBROUTINE cp_fm_upper_to_full
1397 :
1398 : ! **************************************************************************************************
1399 : !> \brief scales column i of matrix a with scaling(i)
1400 : !> \param matrixa ...
1401 : !> \param scaling : an array used for scaling the columns,
1402 : !> SIZE(scaling) determines the number of columns to be scaled
1403 : !> \author Joost VandeVondele
1404 : !> \note
1405 : !> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1406 : !> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1407 : !> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1408 : !> where every vector has a different prefactor
1409 : ! **************************************************************************************************
1410 126558 : SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1411 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1412 : REAL(KIND=dp), DIMENSION(:), INTENT(in) :: scaling
1413 :
1414 : INTEGER :: k, mypcol, myprow, n, ncol_global, &
1415 : npcol, nprow
1416 126558 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1417 126558 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1418 : #if defined(__parallel)
1419 : INTEGER :: icol_global, icol_local, &
1420 : ipcol, iprow, irow_local
1421 : #else
1422 : INTEGER :: i
1423 : #endif
1424 :
1425 126558 : myprow = matrixa%matrix_struct%context%mepos(1)
1426 126558 : mypcol = matrixa%matrix_struct%context%mepos(2)
1427 126558 : nprow = matrixa%matrix_struct%context%num_pe(1)
1428 126558 : npcol = matrixa%matrix_struct%context%num_pe(2)
1429 :
1430 126558 : ncol_global = matrixa%matrix_struct%ncol_global
1431 :
1432 126558 : a => matrixa%local_data
1433 126558 : a_sp => matrixa%local_data_sp
1434 126558 : IF (matrixa%use_sp) THEN
1435 0 : n = SIZE(a_sp, 1)
1436 : ELSE
1437 126558 : n = SIZE(a, 1)
1438 : END IF
1439 126558 : k = MIN(SIZE(scaling), ncol_global)
1440 :
1441 : #if defined(__parallel)
1442 :
1443 2052275 : DO icol_global = 1, k
1444 : CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1445 : nprow, npcol, myprow, mypcol, &
1446 1925717 : irow_local, icol_local, iprow, ipcol)
1447 2052275 : IF ((ipcol == mypcol)) THEN
1448 1925717 : IF (matrixa%use_sp) THEN
1449 0 : CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1450 : ELSE
1451 1925717 : CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
1452 : END IF
1453 : END IF
1454 : END DO
1455 : #else
1456 : DO i = 1, k
1457 : IF (matrixa%use_sp) THEN
1458 : CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
1459 : ELSE
1460 : CALL DSCAL(n, scaling(i), a(:, i), 1)
1461 : END IF
1462 : END DO
1463 : #endif
1464 126558 : END SUBROUTINE cp_fm_column_scale
1465 :
1466 : ! **************************************************************************************************
1467 : !> \brief scales row i of matrix a with scaling(i)
1468 : !> \param matrixa ...
1469 : !> \param scaling : an array used for scaling the columns,
1470 : !> \author JGH
1471 : !> \note
1472 : ! **************************************************************************************************
1473 6564 : SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1474 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1475 : REAL(KIND=dp), DIMENSION(:), INTENT(in) :: scaling
1476 :
1477 : INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1478 6564 : INTEGER, DIMENSION(:), POINTER :: row_indices
1479 6564 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1480 6564 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1481 : #if defined(__parallel)
1482 : INTEGER :: irow_global, icol, irow
1483 : #else
1484 : INTEGER :: j
1485 : #endif
1486 :
1487 : CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1488 6564 : nrow_local=nrow_local, ncol_local=ncol_local)
1489 6564 : CPASSERT(SIZE(scaling) == nrow_global)
1490 :
1491 6564 : a => matrixa%local_data
1492 6564 : a_sp => matrixa%local_data_sp
1493 6564 : IF (matrixa%use_sp) THEN
1494 6564 : n = SIZE(a_sp, 1)
1495 6564 : m = SIZE(a_sp, 2)
1496 : ELSE
1497 6564 : n = SIZE(a, 1)
1498 6564 : m = SIZE(a, 2)
1499 : END IF
1500 :
1501 : #if defined(__parallel)
1502 81426 : DO icol = 1, ncol_local
1503 81426 : IF (matrixa%use_sp) THEN
1504 0 : DO irow = 1, nrow_local
1505 0 : irow_global = row_indices(irow)
1506 0 : a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
1507 : END DO
1508 : ELSE
1509 6667421 : DO irow = 1, nrow_local
1510 6592559 : irow_global = row_indices(irow)
1511 6667421 : a(irow, icol) = scaling(irow_global)*a(irow, icol)
1512 : END DO
1513 : END IF
1514 : END DO
1515 : #else
1516 : IF (matrixa%use_sp) THEN
1517 : DO j = 1, m
1518 : a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
1519 : END DO
1520 : ELSE
1521 : DO j = 1, m
1522 : a(1:n, j) = scaling(1:n)*a(1:n, j)
1523 : END DO
1524 : END IF
1525 : #endif
1526 6564 : END SUBROUTINE cp_fm_row_scale
1527 : ! **************************************************************************************************
1528 : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
1529 : !> \param matrix_a the matrix to invert
1530 : !> \param matrix_inverse the inverse of matrix_a
1531 : !> \param det_a the determinant of matrix_a
1532 : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
1533 : !> are screened
1534 : !> \param eigval optionally return matrix eigenvalues/singular values
1535 : !> \par History
1536 : !> note of Jan Wilhelm (12.2015)
1537 : !> - computation of determinant corrected
1538 : !> - determinant only computed if det_a is present
1539 : !> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
1540 : !> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
1541 : !> \author Florian Schiffmann(02.2007)
1542 : ! **************************************************************************************************
1543 1336 : SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
1544 :
1545 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
1546 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: det_a
1547 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_svd
1548 : REAL(KIND=dp), DIMENSION(:), POINTER, &
1549 : INTENT(INOUT), OPTIONAL :: eigval
1550 :
1551 : INTEGER :: n
1552 1336 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1553 : REAL(KIND=dp) :: determinant, my_eps_svd
1554 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1555 : TYPE(cp_fm_type) :: matrix_lu
1556 :
1557 : #if defined(__parallel)
1558 : TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
1559 : TYPE(mp_comm_type) :: group
1560 : INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
1561 : INTEGER, DIMENSION(9) :: desca
1562 : LOGICAL :: quenched
1563 : REAL(KIND=dp) :: alpha, beta
1564 1336 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
1565 1336 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
1566 : #else
1567 : LOGICAL :: sign
1568 : REAL(KIND=dp) :: eps1
1569 : #endif
1570 :
1571 1336 : my_eps_svd = 0.0_dp
1572 468 : IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
1573 :
1574 : CALL cp_fm_create(matrix=matrix_lu, &
1575 : matrix_struct=matrix_a%matrix_struct, &
1576 1336 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
1577 1336 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
1578 :
1579 1336 : a => matrix_lu%local_data
1580 1336 : n = matrix_lu%matrix_struct%nrow_global
1581 4008 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1582 18866 : ipivot(:) = 0
1583 : #if defined(__parallel)
1584 1336 : IF (my_eps_svd .EQ. 0.0_dp) THEN
1585 : ! Use LU decomposition
1586 1308 : lwork = 3*n
1587 1308 : liwork = 3*n
1588 13080 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1589 1308 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1590 :
1591 1308 : IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
1592 :
1593 1116 : ALLOCATE (diag(n))
1594 916 : diag(:) = 0.0_dp
1595 372 : CALL cp_fm_get_diag(matrix_lu, diag)
1596 :
1597 372 : exponent_of_minus_one = 0
1598 372 : determinant = 1.0_dp
1599 916 : DO i = 1, n
1600 544 : determinant = determinant*diag(i)
1601 916 : IF (ipivot(i) .NE. i) THEN
1602 224 : exponent_of_minus_one = exponent_of_minus_one + 1
1603 : END IF
1604 : END DO
1605 372 : IF (PRESENT(eigval)) THEN
1606 0 : CPASSERT(.NOT. ASSOCIATED(eigval))
1607 0 : ALLOCATE (eigval(n))
1608 0 : eigval(:) = diag
1609 : END IF
1610 372 : DEALLOCATE (diag)
1611 :
1612 372 : group = matrix_lu%matrix_struct%para_env
1613 372 : CALL group%sum(exponent_of_minus_one)
1614 :
1615 372 : determinant = determinant*(-1.0_dp)**exponent_of_minus_one
1616 :
1617 : END IF
1618 :
1619 1308 : alpha = 0.0_dp
1620 1308 : beta = 1.0_dp
1621 1308 : CALL cp_fm_set_all(matrix_inverse, alpha, beta)
1622 1308 : CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
1623 : ELSE
1624 : ! Use singular value decomposition
1625 : CALL cp_fm_create(matrix=u, &
1626 : matrix_struct=matrix_a%matrix_struct, &
1627 28 : name="LEFT_SINGULAR_MATRIX")
1628 28 : CALL cp_fm_set_all(u, alpha=0.0_dp)
1629 : CALL cp_fm_create(matrix=vt, &
1630 : matrix_struct=matrix_a%matrix_struct, &
1631 28 : name="RIGHT_SINGULAR_MATRIX")
1632 28 : CALL cp_fm_set_all(vt, alpha=0.0_dp)
1633 84 : ALLOCATE (diag(n))
1634 92 : diag(:) = 0.0_dp
1635 280 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1636 28 : ALLOCATE (work(1))
1637 : ! Workspace query
1638 28 : lwork = -1
1639 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1640 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1641 28 : lwork = INT(work(1))
1642 28 : DEALLOCATE (work)
1643 84 : ALLOCATE (work(lwork))
1644 : ! SVD
1645 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1646 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1647 : ! info == n+1 implies homogeneity error when the number of procs is large
1648 : ! this likely isnt a problem, but maybe we should handle it separately
1649 28 : IF (info /= 0 .AND. info /= n + 1) &
1650 0 : CPABORT("Singular value decomposition of matrix failed.")
1651 : ! (Pseudo)inverse and (pseudo)determinant
1652 : CALL cp_fm_create(matrix=sigma, &
1653 : matrix_struct=matrix_a%matrix_struct, &
1654 28 : name="SINGULAR_VALUE_MATRIX")
1655 28 : CALL cp_fm_set_all(sigma, alpha=0.0_dp)
1656 28 : determinant = 1.0_dp
1657 28 : quenched = .FALSE.
1658 28 : IF (PRESENT(eigval)) THEN
1659 28 : CPASSERT(.NOT. ASSOCIATED(eigval))
1660 84 : ALLOCATE (eigval(n))
1661 156 : eigval(:) = diag
1662 : END IF
1663 92 : DO i = 1, n
1664 64 : IF (diag(i) < my_eps_svd) THEN
1665 18 : diag(i) = 0.0_dp
1666 18 : quenched = .TRUE.
1667 : ELSE
1668 46 : determinant = determinant*diag(i)
1669 46 : diag(i) = 1.0_dp/diag(i)
1670 : END IF
1671 92 : CALL cp_fm_set_element(sigma, i, i, diag(i))
1672 : END DO
1673 28 : DEALLOCATE (diag)
1674 28 : IF (quenched) &
1675 : CALL cp_warn(__LOCATION__, &
1676 : "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
1677 12 : ". At least one singular value has been quenched.")
1678 : ! Sigma^-1 * U^T
1679 : CALL cp_fm_create(matrix=inv_sigma_ut, &
1680 : matrix_struct=matrix_a%matrix_struct, &
1681 28 : name="SINGULAR_VALUE_MATRIX")
1682 28 : CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
1683 : CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
1684 28 : u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
1685 : ! A^-1 = V * (Sigma^-1 * U^T)
1686 28 : CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
1687 : CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
1688 28 : inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
1689 : ! Clean up
1690 28 : DEALLOCATE (work)
1691 28 : CALL cp_fm_release(u)
1692 28 : CALL cp_fm_release(vt)
1693 28 : CALL cp_fm_release(sigma)
1694 140 : CALL cp_fm_release(inv_sigma_ut)
1695 : END IF
1696 : #else
1697 : IF (my_eps_svd .EQ. 0.0_dp) THEN
1698 : sign = .TRUE.
1699 : CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
1700 : eval_error=eps1)
1701 : CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
1702 : IF (PRESENT(eigval)) &
1703 : CALL cp_abort(__LOCATION__, &
1704 : "NYI. Eigenvalues not available for return without SCALAPACK.")
1705 : ELSE
1706 : CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
1707 : determinant, eigval)
1708 : END IF
1709 : #endif
1710 1336 : CALL cp_fm_release(matrix_lu)
1711 1336 : DEALLOCATE (ipivot)
1712 1336 : IF (PRESENT(det_a)) det_a = determinant
1713 1336 : END SUBROUTINE cp_fm_invert
1714 :
1715 : ! **************************************************************************************************
1716 : !> \brief inverts a triangular matrix
1717 : !> \param matrix_a ...
1718 : !> \param uplo_tr ...
1719 : !> \author MI
1720 : ! **************************************************************************************************
1721 5034 : SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
1722 :
1723 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1724 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1725 :
1726 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
1727 :
1728 : CHARACTER :: unit_diag, uplo
1729 : INTEGER :: handle, info, ncol_global
1730 5034 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1731 : #if defined(__parallel)
1732 : INTEGER, DIMENSION(9) :: desca
1733 : #endif
1734 :
1735 5034 : CALL timeset(routineN, handle)
1736 :
1737 5034 : unit_diag = 'N'
1738 5034 : uplo = 'U'
1739 5034 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1740 :
1741 5034 : ncol_global = matrix_a%matrix_struct%ncol_global
1742 :
1743 5034 : a => matrix_a%local_data
1744 :
1745 : #if defined(__parallel)
1746 :
1747 50340 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1748 :
1749 5034 : CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
1750 :
1751 : #else
1752 : CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
1753 : #endif
1754 :
1755 5034 : CALL timestop(handle)
1756 5034 : END SUBROUTINE cp_fm_triangular_invert
1757 :
1758 : ! **************************************************************************************************
1759 : !> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
1760 : !> the computed upper triangular matrix R is in output in the submatrix sub(A) of size NxN
1761 : !> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
1762 : !> \param matrix_a ...
1763 : !> \param matrix_r ...
1764 : !> \param nrow_fact ...
1765 : !> \param ncol_fact ...
1766 : !> \param first_row ...
1767 : !> \param first_col ...
1768 : !> \author MI
1769 : ! **************************************************************************************************
1770 19320 : SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
1771 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
1772 : INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
1773 : first_row, first_col
1774 :
1775 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
1776 :
1777 : INTEGER :: handle, i, icol, info, irow, &
1778 : j, lda, lwork, ncol, &
1779 : ndim, nrow
1780 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
1781 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
1782 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1783 : #if defined(__parallel)
1784 : INTEGER, DIMENSION(9) :: desca
1785 : #endif
1786 :
1787 19320 : CALL timeset(routineN, handle)
1788 :
1789 19320 : ncol = matrix_a%matrix_struct%ncol_global
1790 19320 : nrow = matrix_a%matrix_struct%nrow_global
1791 19320 : lda = nrow
1792 :
1793 19320 : a => matrix_a%local_data
1794 :
1795 19320 : IF (PRESENT(nrow_fact)) nrow = nrow_fact
1796 19320 : IF (PRESENT(ncol_fact)) ncol = ncol_fact
1797 19320 : irow = 1
1798 19320 : IF (PRESENT(first_row)) irow = first_row
1799 19320 : icol = 1
1800 19320 : IF (PRESENT(first_col)) icol = first_col
1801 :
1802 19320 : CPASSERT(nrow >= ncol)
1803 19320 : ndim = SIZE(a, 2)
1804 : ! ALLOCATE(ipiv(ndim))
1805 57960 : ALLOCATE (tau(ndim))
1806 :
1807 : #if defined(__parallel)
1808 :
1809 193200 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1810 :
1811 19320 : lwork = -1
1812 57960 : ALLOCATE (work(2*ndim))
1813 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1814 19320 : lwork = INT(work(1))
1815 19320 : DEALLOCATE (work)
1816 57960 : ALLOCATE (work(lwork))
1817 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1818 :
1819 : #else
1820 : lwork = -1
1821 : ALLOCATE (work(2*ndim))
1822 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1823 : lwork = INT(work(1))
1824 : DEALLOCATE (work)
1825 : ALLOCATE (work(lwork))
1826 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1827 :
1828 : #endif
1829 :
1830 77280 : ALLOCATE (r_mat(ncol, ncol))
1831 19320 : CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
1832 38640 : DO i = 1, ncol
1833 38640 : DO j = i + 1, ncol
1834 19320 : r_mat(j, i) = 0.0_dp
1835 : END DO
1836 : END DO
1837 19320 : CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
1838 :
1839 19320 : DEALLOCATE (tau, work, r_mat)
1840 :
1841 19320 : CALL timestop(handle)
1842 :
1843 19320 : END SUBROUTINE cp_fm_qr_factorization
1844 :
1845 : ! **************************************************************************************************
1846 : !> \brief computes the the solution to A*b=A_general using lu decomposition
1847 : !> pay attention, both matrices are overwritten, a_general contais the result
1848 : !> \param matrix_a ...
1849 : !> \param general_a ...
1850 : !> \author Florian Schiffmann
1851 : ! **************************************************************************************************
1852 5666 : SUBROUTINE cp_fm_solve(matrix_a, general_a)
1853 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
1854 :
1855 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
1856 :
1857 : INTEGER :: handle, info, n, nrhs
1858 5666 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1859 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, a_general
1860 : #if defined(__parallel)
1861 : INTEGER, DIMENSION(9) :: desca, descb
1862 : #else
1863 : INTEGER :: lda, ldb
1864 : #endif
1865 :
1866 5666 : CALL timeset(routineN, handle)
1867 :
1868 5666 : a => matrix_a%local_data
1869 5666 : a_general => general_a%local_data
1870 5666 : n = matrix_a%matrix_struct%nrow_global
1871 5666 : nrhs = general_a%matrix_struct%ncol_global
1872 16998 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1873 :
1874 : #if defined(__parallel)
1875 56660 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1876 56660 : descb(:) = general_a%matrix_struct%descriptor(:)
1877 5666 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1878 : CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
1879 5666 : 1, 1, descb, info)
1880 :
1881 : #else
1882 : lda = SIZE(a, 1)
1883 : ldb = SIZE(a_general, 1)
1884 : CALL dgetrf(n, n, a, lda, ipivot, info)
1885 : CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
1886 :
1887 : #endif
1888 : ! info is allowed to be zero
1889 : ! this does just signal a zero diagonal element
1890 5666 : DEALLOCATE (ipivot)
1891 5666 : CALL timestop(handle)
1892 5666 : END SUBROUTINE
1893 :
1894 : ! **************************************************************************************************
1895 : !> \brief Convenience function. Computes the matrix multiplications needed
1896 : !> for the multiplication of complex matrices.
1897 : !> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
1898 : !> \param transa : 'N' -> normal 'T' -> transpose
1899 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
1900 : !> \param transb ...
1901 : !> \param m ...
1902 : !> \param n ...
1903 : !> \param k ...
1904 : !> \param alpha ...
1905 : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
1906 : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
1907 : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
1908 : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
1909 : !> \param beta ...
1910 : !> \param C_re m x n matrix, real part
1911 : !> \param C_im m x n matrix, imaginary part
1912 : !> \param a_first_col ...
1913 : !> \param a_first_row ...
1914 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
1915 : !> \param b_first_row ...
1916 : !> \param c_first_col ...
1917 : !> \param c_first_row ...
1918 : !> \author Samuel Andermatt
1919 : !> \note
1920 : !> C should have no overlap with A, B
1921 : ! **************************************************************************************************
1922 0 : SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
1923 : C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
1924 : c_first_row)
1925 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
1926 : INTEGER, INTENT(IN) :: m, n, k
1927 : REAL(KIND=dp), INTENT(IN) :: alpha
1928 : TYPE(cp_fm_type), INTENT(IN) :: A_re, A_im, B_re, B_im
1929 : REAL(KIND=dp), INTENT(IN) :: beta
1930 : TYPE(cp_fm_type), INTENT(IN) :: C_re, C_im
1931 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
1932 : b_first_row, c_first_col, c_first_row
1933 :
1934 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
1935 :
1936 : INTEGER :: handle
1937 :
1938 0 : CALL timeset(routineN, handle)
1939 :
1940 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
1941 : a_first_col=a_first_col, &
1942 : a_first_row=a_first_row, &
1943 : b_first_col=b_first_col, &
1944 : b_first_row=b_first_row, &
1945 : c_first_col=c_first_col, &
1946 0 : c_first_row=c_first_row)
1947 : CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
1948 : a_first_col=a_first_col, &
1949 : a_first_row=a_first_row, &
1950 : b_first_col=b_first_col, &
1951 : b_first_row=b_first_row, &
1952 : c_first_col=c_first_col, &
1953 0 : c_first_row=c_first_row)
1954 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
1955 : a_first_col=a_first_col, &
1956 : a_first_row=a_first_row, &
1957 : b_first_col=b_first_col, &
1958 : b_first_row=b_first_row, &
1959 : c_first_col=c_first_col, &
1960 0 : c_first_row=c_first_row)
1961 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
1962 : a_first_col=a_first_col, &
1963 : a_first_row=a_first_row, &
1964 : b_first_col=b_first_col, &
1965 : b_first_row=b_first_row, &
1966 : c_first_col=c_first_col, &
1967 0 : c_first_row=c_first_row)
1968 :
1969 0 : CALL timestop(handle)
1970 :
1971 0 : END SUBROUTINE cp_complex_fm_gemm
1972 :
1973 : ! **************************************************************************************************
1974 : !> \brief inverts a matrix using LU decomposition
1975 : !> the input matrix will be overwritten
1976 : !> \param matrix : input a general square non-singular matrix, outputs its inverse
1977 : !> \param info_out : optional, if present outputs the info from (p)zgetri
1978 : !> \author Lianheng Tong
1979 : ! **************************************************************************************************
1980 0 : SUBROUTINE cp_fm_lu_invert(matrix, info_out)
1981 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1982 : INTEGER, INTENT(OUT), OPTIONAL :: info_out
1983 :
1984 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
1985 :
1986 : INTEGER :: nrows_global, handle, info, lwork
1987 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
1988 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat
1989 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: mat_sp
1990 0 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
1991 0 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
1992 : #if defined(__parallel)
1993 : INTEGER :: liwork
1994 : INTEGER, DIMENSION(9) :: desca
1995 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
1996 : #else
1997 : INTEGER :: lda
1998 : #endif
1999 :
2000 0 : CALL timeset(routineN, handle)
2001 :
2002 0 : mat => matrix%local_data
2003 0 : mat_sp => matrix%local_data_sp
2004 0 : nrows_global = matrix%matrix_struct%nrow_global
2005 0 : CPASSERT(nrows_global .EQ. matrix%matrix_struct%ncol_global)
2006 0 : ALLOCATE (ipivot(nrows_global))
2007 : ! do LU decomposition
2008 : #if defined(__parallel)
2009 0 : desca = matrix%matrix_struct%descriptor
2010 0 : IF (matrix%use_sp) THEN
2011 : CALL psgetrf(nrows_global, nrows_global, &
2012 0 : mat_sp, 1, 1, desca, ipivot, info)
2013 : ELSE
2014 : CALL pdgetrf(nrows_global, nrows_global, &
2015 0 : mat, 1, 1, desca, ipivot, info)
2016 : END IF
2017 : #else
2018 : lda = SIZE(mat, 1)
2019 : IF (matrix%use_sp) THEN
2020 : CALL sgetrf(nrows_global, nrows_global, &
2021 : mat_sp, lda, ipivot, info)
2022 : ELSE
2023 : CALL dgetrf(nrows_global, nrows_global, &
2024 : mat, lda, ipivot, info)
2025 : END IF
2026 : #endif
2027 0 : IF (info /= 0) THEN
2028 0 : CALL cp_abort(__LOCATION__, "LU decomposition has failed")
2029 : END IF
2030 : ! do inversion
2031 0 : IF (matrix%use_sp) THEN
2032 0 : ALLOCATE (work(1))
2033 : ELSE
2034 0 : ALLOCATE (work_sp(1))
2035 : END IF
2036 : #if defined(__parallel)
2037 0 : ALLOCATE (iwork(1))
2038 0 : IF (matrix%use_sp) THEN
2039 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2040 0 : ipivot, work_sp, -1, iwork, -1, info)
2041 0 : lwork = INT(work_sp(1))
2042 0 : DEALLOCATE (work_sp)
2043 0 : ALLOCATE (work_sp(lwork))
2044 : ELSE
2045 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2046 0 : ipivot, work, -1, iwork, -1, info)
2047 0 : lwork = INT(work(1))
2048 0 : DEALLOCATE (work)
2049 0 : ALLOCATE (work(lwork))
2050 : END IF
2051 0 : liwork = INT(iwork(1))
2052 0 : DEALLOCATE (iwork)
2053 0 : ALLOCATE (iwork(liwork))
2054 0 : IF (matrix%use_sp) THEN
2055 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2056 0 : ipivot, work_sp, lwork, iwork, liwork, info)
2057 : ELSE
2058 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2059 0 : ipivot, work, lwork, iwork, liwork, info)
2060 : END IF
2061 0 : DEALLOCATE (iwork)
2062 : #else
2063 : IF (matrix%use_sp) THEN
2064 : CALL sgetri(nrows_global, mat_sp, lda, &
2065 : ipivot, work_sp, -1, info)
2066 : lwork = INT(work_sp(1))
2067 : DEALLOCATE (work_sp)
2068 : ALLOCATE (work_sp(lwork))
2069 : CALL sgetri(nrows_global, mat_sp, lda, &
2070 : ipivot, work_sp, lwork, info)
2071 : ELSE
2072 : CALL dgetri(nrows_global, mat, lda, &
2073 : ipivot, work, -1, info)
2074 : lwork = INT(work(1))
2075 : DEALLOCATE (work)
2076 : ALLOCATE (work(lwork))
2077 : CALL dgetri(nrows_global, mat, lda, &
2078 : ipivot, work, lwork, info)
2079 : END IF
2080 : #endif
2081 0 : IF (matrix%use_sp) THEN
2082 0 : DEALLOCATE (work_sp)
2083 : ELSE
2084 0 : DEALLOCATE (work)
2085 : END IF
2086 0 : DEALLOCATE (ipivot)
2087 :
2088 0 : IF (PRESENT(info_out)) THEN
2089 0 : info_out = info
2090 : ELSE
2091 0 : IF (info /= 0) &
2092 0 : CALL cp_abort(__LOCATION__, "LU inversion has failed")
2093 : END IF
2094 :
2095 0 : CALL timestop(handle)
2096 :
2097 0 : END SUBROUTINE cp_fm_lu_invert
2098 :
2099 : ! **************************************************************************************************
2100 : !> \brief norm of matrix using (p)dlange
2101 : !> \param matrix : input a general matrix
2102 : !> \param mode : 'M' max abs element value,
2103 : !> '1' or 'O' one norm, i.e. maximum column sum
2104 : !> 'I' infinity norm, i.e. maximum row sum
2105 : !> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2106 : !> \return : the norm according to mode
2107 : !> \author Lianheng Tong
2108 : ! **************************************************************************************************
2109 492 : FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2110 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2111 : CHARACTER, INTENT(IN) :: mode
2112 : REAL(KIND=dp) :: res
2113 :
2114 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
2115 :
2116 : INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2117 : REAL(KIND=sp) :: res_sp
2118 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2119 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2120 492 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2121 492 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2122 : #if defined(__parallel)
2123 : INTEGER, DIMENSION(9) :: desca
2124 : #else
2125 : INTEGER :: lda
2126 : #endif
2127 :
2128 492 : CALL timeset(routineN, handle)
2129 :
2130 : CALL cp_fm_get_info(matrix=matrix, &
2131 : nrow_global=nrows, &
2132 : ncol_global=ncols, &
2133 : nrow_local=nrows_local, &
2134 492 : ncol_local=ncols_local)
2135 492 : aa => matrix%local_data
2136 492 : aa_sp => matrix%local_data_sp
2137 :
2138 : #if defined(__parallel)
2139 4920 : desca = matrix%matrix_struct%descriptor
2140 : SELECT CASE (mode)
2141 : CASE ('M', 'm')
2142 492 : lwork = 1
2143 : CASE ('1', 'O', 'o')
2144 492 : lwork = ncols_local
2145 : CASE ('I', 'i')
2146 0 : lwork = nrows_local
2147 : CASE ('F', 'f', 'E', 'e')
2148 0 : lwork = 1
2149 : CASE DEFAULT
2150 492 : CPABORT("mode input is not valid")
2151 : END SELECT
2152 492 : IF (matrix%use_sp) THEN
2153 0 : ALLOCATE (work_sp(lwork))
2154 0 : res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2155 0 : DEALLOCATE (work_sp)
2156 0 : res = REAL(res_sp, KIND=dp)
2157 : ELSE
2158 1476 : ALLOCATE (work(lwork))
2159 492 : res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2160 492 : DEALLOCATE (work)
2161 : END IF
2162 : #else
2163 : SELECT CASE (mode)
2164 : CASE ('M', 'm')
2165 : lwork = 1
2166 : CASE ('1', 'O', 'o')
2167 : lwork = 1
2168 : CASE ('I', 'i')
2169 : lwork = nrows
2170 : CASE ('F', 'f', 'E', 'e')
2171 : lwork = 1
2172 : CASE DEFAULT
2173 : CPABORT("mode input is not valid")
2174 : END SELECT
2175 : IF (matrix%use_sp) THEN
2176 : ALLOCATE (work_sp(lwork))
2177 : lda = SIZE(aa_sp, 1)
2178 : res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2179 : DEALLOCATE (work_sp)
2180 : res = REAL(res_sp, KIND=dp)
2181 : ELSE
2182 : ALLOCATE (work(lwork))
2183 : lda = SIZE(aa, 1)
2184 : res = dlange(mode, nrows, ncols, aa, lda, work)
2185 : DEALLOCATE (work)
2186 : END IF
2187 : #endif
2188 :
2189 492 : CALL timestop(handle)
2190 :
2191 492 : END FUNCTION cp_fm_norm
2192 :
2193 : ! **************************************************************************************************
2194 : !> \brief trace of a matrix using pdlatra
2195 : !> \param matrix : input a square matrix
2196 : !> \return : the trace
2197 : !> \author Lianheng Tong
2198 : ! **************************************************************************************************
2199 0 : FUNCTION cp_fm_latra(matrix) RESULT(res)
2200 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2201 : REAL(KIND=dp) :: res
2202 :
2203 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
2204 :
2205 : INTEGER :: nrows, ncols, handle
2206 : REAL(KIND=sp) :: res_sp
2207 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2208 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2209 : #if defined(__parallel)
2210 : INTEGER, DIMENSION(9) :: desca
2211 : #else
2212 : INTEGER :: ii
2213 : #endif
2214 :
2215 0 : CALL timeset(routineN, handle)
2216 :
2217 0 : nrows = matrix%matrix_struct%nrow_global
2218 0 : ncols = matrix%matrix_struct%ncol_global
2219 0 : CPASSERT(nrows .EQ. ncols)
2220 0 : aa => matrix%local_data
2221 0 : aa_sp => matrix%local_data_sp
2222 :
2223 : #if defined(__parallel)
2224 0 : desca = matrix%matrix_struct%descriptor
2225 0 : IF (matrix%use_sp) THEN
2226 0 : res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2227 0 : res = REAL(res_sp, KIND=dp)
2228 : ELSE
2229 0 : res = pdlatra(nrows, aa, 1, 1, desca)
2230 : END IF
2231 : #else
2232 : IF (matrix%use_sp) THEN
2233 : res_sp = 0.0_sp
2234 : DO ii = 1, nrows
2235 : res_sp = res_sp + aa_sp(ii, ii)
2236 : END DO
2237 : res = REAL(res_sp, KIND=dp)
2238 : ELSE
2239 : res = 0.0_dp
2240 : DO ii = 1, nrows
2241 : res = res + aa(ii, ii)
2242 : END DO
2243 : END IF
2244 : #endif
2245 :
2246 0 : CALL timestop(handle)
2247 :
2248 0 : END FUNCTION cp_fm_latra
2249 :
2250 : ! **************************************************************************************************
2251 : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2252 : !> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2253 : !> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2254 : !> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2255 : !> \param nrow ...
2256 : !> \param ncol ...
2257 : !> \param first_row ...
2258 : !> \param first_col ...
2259 : !> \author MI
2260 : ! **************************************************************************************************
2261 36 : SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2262 :
2263 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2264 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2265 : INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2266 :
2267 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
2268 :
2269 : INTEGER :: handle
2270 : INTEGER :: info, lwork
2271 36 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2272 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2273 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2274 : #if defined(__parallel)
2275 : INTEGER, DIMENSION(9) :: descc
2276 : #else
2277 : INTEGER :: lda
2278 : #endif
2279 :
2280 36 : CALL timeset(routineN, handle)
2281 :
2282 36 : a => matrix%local_data
2283 36 : lwork = -1
2284 108 : ALLOCATE (work(2*nrow))
2285 108 : ALLOCATE (ipiv(ncol))
2286 36 : info = 0
2287 :
2288 : #if defined(__parallel)
2289 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2290 : ! Call SCALAPACK routine to get optimal work dimension
2291 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2292 36 : lwork = INT(work(1))
2293 36 : DEALLOCATE (work)
2294 108 : ALLOCATE (work(lwork))
2295 244 : tau = 0.0_dp
2296 354 : ipiv = 0
2297 :
2298 : ! Call SCALAPACK routine to get QR decomposition of CTs
2299 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2300 : #else
2301 : CPASSERT(first_row == 1 .AND. first_col == 1)
2302 : lda = SIZE(a, 1)
2303 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2304 : lwork = INT(work(1))
2305 : DEALLOCATE (work)
2306 : ALLOCATE (work(lwork))
2307 : tau = 0.0_dp
2308 : ipiv = 0
2309 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2310 : #endif
2311 36 : CPASSERT(info == 0)
2312 :
2313 36 : DEALLOCATE (work)
2314 36 : DEALLOCATE (ipiv)
2315 :
2316 36 : CALL timestop(handle)
2317 :
2318 36 : END SUBROUTINE cp_fm_pdgeqpf
2319 :
2320 : ! **************************************************************************************************
2321 : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2322 : !> with orthonormal columns, which is defined as the first N columns of a product of K
2323 : !> elementary reflectors of order M
2324 : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2325 : !> as returned from PDGEQRF
2326 : !> On exit it contains the M-by-N distributed matrix Q
2327 : !> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2328 : !> \param nrow ...
2329 : !> \param first_row ...
2330 : !> \param first_col ...
2331 : !> \author MI
2332 : ! **************************************************************************************************
2333 36 : SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2334 :
2335 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2336 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2337 : INTEGER, INTENT(IN) :: nrow, first_row, first_col
2338 :
2339 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
2340 :
2341 : INTEGER :: handle
2342 : INTEGER :: info, lwork
2343 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2344 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2345 : #if defined(__parallel)
2346 : INTEGER, DIMENSION(9) :: descc
2347 : #else
2348 : INTEGER :: lda
2349 : #endif
2350 :
2351 36 : CALL timeset(routineN, handle)
2352 :
2353 36 : a => matrix%local_data
2354 36 : lwork = -1
2355 108 : ALLOCATE (work(2*nrow))
2356 36 : info = 0
2357 :
2358 : #if defined(__parallel)
2359 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2360 :
2361 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2362 36 : CPASSERT(info == 0)
2363 36 : lwork = INT(work(1))
2364 36 : DEALLOCATE (work)
2365 108 : ALLOCATE (work(lwork))
2366 :
2367 : ! Call SCALAPACK routine to get Q
2368 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2369 : #else
2370 : CPASSERT(first_row == 1 .AND. first_col == 1)
2371 : lda = SIZE(a, 1)
2372 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2373 : lwork = INT(work(1))
2374 : DEALLOCATE (work)
2375 : ALLOCATE (work(lwork))
2376 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2377 : #endif
2378 36 : CPASSERT(INFO == 0)
2379 :
2380 36 : DEALLOCATE (work)
2381 36 : CALL timestop(handle)
2382 :
2383 36 : END SUBROUTINE cp_fm_pdorgqr
2384 :
2385 : ! **************************************************************************************************
2386 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2387 : !> \param cs cosine of the rotation angle
2388 : !> \param sn sinus of the rotation angle
2389 : !> \param irow ...
2390 : !> \param jrow ...
2391 : !> \author Ole Schuett
2392 : ! **************************************************************************************************
2393 543328 : SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2394 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2395 : INTEGER, INTENT(IN) :: irow, jrow
2396 : REAL(dp), INTENT(IN) :: cs, sn
2397 :
2398 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
2399 : INTEGER :: handle, nrow, ncol
2400 :
2401 : #if defined(__parallel)
2402 : INTEGER :: info, lwork
2403 : INTEGER, DIMENSION(9) :: desc
2404 543328 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2405 : #endif
2406 :
2407 543328 : CALL timeset(routineN, handle)
2408 543328 : CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2409 :
2410 : #if defined(__parallel)
2411 543328 : lwork = 2*ncol + 1
2412 1629984 : ALLOCATE (work(lwork))
2413 5433280 : desc(:) = matrix%matrix_struct%descriptor(:)
2414 : CALL pdrot(ncol, &
2415 : matrix%local_data(1, 1), irow, 1, desc, ncol, &
2416 : matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2417 543328 : cs, sn, work, lwork, info)
2418 543328 : CPASSERT(info == 0)
2419 543328 : DEALLOCATE (work)
2420 : #else
2421 : CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2422 : #endif
2423 :
2424 543328 : CALL timestop(handle)
2425 543328 : END SUBROUTINE cp_fm_rot_rows
2426 :
2427 : ! **************************************************************************************************
2428 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2429 : !> \param cs cosine of the rotation angle
2430 : !> \param sn sinus of the rotation angle
2431 : !> \param icol ...
2432 : !> \param jcol ...
2433 : !> \author Ole Schuett
2434 : ! **************************************************************************************************
2435 612158 : SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2436 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2437 : INTEGER, INTENT(IN) :: icol, jcol
2438 : REAL(dp), INTENT(IN) :: cs, sn
2439 :
2440 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
2441 : INTEGER :: handle, nrow, ncol
2442 :
2443 : #if defined(__parallel)
2444 : INTEGER :: info, lwork
2445 : INTEGER, DIMENSION(9) :: desc
2446 612158 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2447 : #endif
2448 :
2449 612158 : CALL timeset(routineN, handle)
2450 612158 : CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2451 :
2452 : #if defined(__parallel)
2453 612158 : lwork = 2*nrow + 1
2454 1836474 : ALLOCATE (work(lwork))
2455 6121580 : desc(:) = matrix%matrix_struct%descriptor(:)
2456 : CALL pdrot(nrow, &
2457 : matrix%local_data(1, 1), 1, icol, desc, 1, &
2458 : matrix%local_data(1, 1), 1, jcol, desc, 1, &
2459 612158 : cs, sn, work, lwork, info)
2460 612158 : CPASSERT(info == 0)
2461 612158 : DEALLOCATE (work)
2462 : #else
2463 : CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2464 : #endif
2465 :
2466 612158 : CALL timestop(handle)
2467 612158 : END SUBROUTINE cp_fm_rot_cols
2468 :
2469 : ! **************************************************************************************************
2470 : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2471 : !> \param matrix_a ...
2472 : !> \param B ...
2473 : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2474 : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2475 : !> \param start_row starting index of rows, optional, defaults to 1
2476 : !> \param start_col starting index of columns, optional, defaults to 1
2477 : !> \param do_norm ...
2478 : !> \param do_print ...
2479 : ! **************************************************************************************************
2480 0 : SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2481 : do_norm, do_print)
2482 :
2483 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2484 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: B
2485 : INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2486 : LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2487 :
2488 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
2489 :
2490 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2491 : j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2492 : start_col_local, start_row_global, start_row_local, this_col, unit_nr
2493 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2494 : LOGICAL :: my_do_norm, my_do_print
2495 : REAL(KIND=dp) :: norm
2496 0 : REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2497 :
2498 0 : CALL timeset(routineN, handle)
2499 :
2500 0 : my_do_norm = .TRUE.
2501 0 : IF (PRESENT(do_norm)) my_do_norm = do_norm
2502 :
2503 0 : my_do_print = .FALSE.
2504 0 : IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2505 :
2506 0 : unit_nr = -1
2507 0 : IF (my_do_print) THEN
2508 0 : unit_nr = cp_logger_get_default_unit_nr()
2509 0 : IF (unit_nr < 1) my_do_print = .FALSE.
2510 : END IF
2511 :
2512 0 : IF (SIZE(B) /= 0) THEN
2513 0 : IF (PRESENT(nrows)) THEN
2514 0 : nrow_global = nrows
2515 : ELSE
2516 0 : nrow_global = SIZE(B, 1)
2517 : END IF
2518 :
2519 0 : IF (PRESENT(ncols)) THEN
2520 0 : ncol_global = ncols
2521 : ELSE
2522 0 : ncol_global = SIZE(B, 2)
2523 : END IF
2524 :
2525 0 : IF (PRESENT(start_row)) THEN
2526 0 : start_row_global = start_row
2527 : ELSE
2528 : start_row_global = 1
2529 : END IF
2530 :
2531 0 : IF (PRESENT(start_col)) THEN
2532 0 : start_col_global = start_col
2533 : ELSE
2534 : start_col_global = 1
2535 : END IF
2536 :
2537 0 : end_row_global = start_row_global + nrow_global - 1
2538 0 : end_col_global = start_col_global + ncol_global - 1
2539 :
2540 : CALL cp_fm_get_info(matrix=matrix_a, &
2541 : nrow_global=nrow_global, ncol_global=ncol_global, &
2542 : nrow_local=nrow_local, ncol_local=ncol_local, &
2543 0 : row_indices=row_indices, col_indices=col_indices)
2544 0 : IF (end_row_global > nrow_global) THEN
2545 : end_row_global = nrow_global
2546 : END IF
2547 0 : IF (end_col_global > ncol_global) THEN
2548 : end_col_global = ncol_global
2549 : END IF
2550 :
2551 : ! find out row/column indices of locally stored matrix elements that
2552 : ! needs to be copied.
2553 : ! Arrays row_indices and col_indices are assumed to be sorted in
2554 : ! ascending order
2555 0 : DO start_row_local = 1, nrow_local
2556 0 : IF (row_indices(start_row_local) >= start_row_global) EXIT
2557 : END DO
2558 :
2559 0 : DO end_row_local = start_row_local, nrow_local
2560 0 : IF (row_indices(end_row_local) > end_row_global) EXIT
2561 : END DO
2562 0 : end_row_local = end_row_local - 1
2563 :
2564 0 : DO start_col_local = 1, ncol_local
2565 0 : IF (col_indices(start_col_local) >= start_col_global) EXIT
2566 : END DO
2567 :
2568 0 : DO end_col_local = start_col_local, ncol_local
2569 0 : IF (col_indices(end_col_local) > end_col_global) EXIT
2570 : END DO
2571 0 : end_col_local = end_col_local - 1
2572 :
2573 0 : a => matrix_a%local_data
2574 :
2575 0 : this_col = col_indices(start_col_local) - start_col_global + 1
2576 :
2577 0 : B(:, this_col) = a(:, start_col_local)
2578 :
2579 0 : IF (my_do_norm) THEN
2580 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2581 0 : B(:, this_col) = B(:, this_col)/norm
2582 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2583 : END IF
2584 :
2585 0 : DO i = start_col_local + 1, end_col_local
2586 0 : this_col = col_indices(i) - start_col_global + 1
2587 0 : B(:, this_col) = a(:, i)
2588 0 : DO j = start_col_local, i - 1
2589 0 : j_col = col_indices(j) - start_col_global + 1
2590 : B(:, this_col) = B(:, this_col) - &
2591 : accurate_dot_product(B(:, j_col), B(:, this_col))* &
2592 0 : B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
2593 : END DO
2594 :
2595 0 : IF (my_do_norm) THEN
2596 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2597 0 : B(:, this_col) = B(:, this_col)/norm
2598 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2599 : END IF
2600 :
2601 : END DO
2602 0 : CALL matrix_a%matrix_struct%para_env%sum(B)
2603 : END IF
2604 :
2605 0 : CALL timestop(handle)
2606 :
2607 0 : END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
2608 :
2609 : ! **************************************************************************************************
2610 : !> \brief Cholesky decomposition
2611 : !> \param fm_matrix ...
2612 : !> \param n ...
2613 : ! **************************************************************************************************
2614 10015 : SUBROUTINE cp_fm_potrf(fm_matrix, n)
2615 : TYPE(cp_fm_type) :: fm_matrix
2616 : INTEGER, INTENT(in) :: n
2617 :
2618 : INTEGER :: info
2619 10015 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2620 10015 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2621 : #if defined(__parallel)
2622 : INTEGER, DIMENSION(9) :: desca
2623 : #endif
2624 :
2625 10015 : a => fm_matrix%local_data
2626 10015 : a_sp => fm_matrix%local_data_sp
2627 : #if defined(__parallel)
2628 100150 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2629 10015 : IF (fm_matrix%use_sp) THEN
2630 0 : CALL pspotrf('U', n, a_sp(1, 1), 1, 1, desca, info)
2631 : ELSE
2632 10015 : CALL pdpotrf('U', n, a(1, 1), 1, 1, desca, info)
2633 : END IF
2634 : #else
2635 : IF (fm_matrix%use_sp) THEN
2636 : CALL spotrf('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
2637 : ELSE
2638 : CALL dpotrf('U', n, a(1, 1), SIZE(a, 1), info)
2639 : END IF
2640 : #endif
2641 10015 : IF (info /= 0) &
2642 0 : CPABORT("Cholesky decomposition failed. Matrix ill conditioned ?")
2643 :
2644 10015 : END SUBROUTINE cp_fm_potrf
2645 :
2646 : ! **************************************************************************************************
2647 : !> \brief Invert trianguar matrix
2648 : !> \param fm_matrix the matrix to invert (must be an upper triangular matrix)
2649 : !> \param n size of the matrix to invert
2650 : ! **************************************************************************************************
2651 9239 : SUBROUTINE cp_fm_potri(fm_matrix, n)
2652 : TYPE(cp_fm_type) :: fm_matrix
2653 : INTEGER, INTENT(in) :: n
2654 :
2655 9239 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2656 9239 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2657 : INTEGER :: info
2658 : #if defined(__parallel)
2659 : INTEGER, DIMENSION(9) :: desca
2660 : #endif
2661 :
2662 9239 : a => fm_matrix%local_data
2663 9239 : a_sp => fm_matrix%local_data_sp
2664 : #if defined(__parallel)
2665 92390 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2666 9239 : IF (fm_matrix%use_sp) THEN
2667 0 : CALL pspotri('U', n, a_sp(1, 1), 1, 1, desca, info)
2668 : ELSE
2669 9239 : CALL pdpotri('U', n, a(1, 1), 1, 1, desca, info)
2670 : END IF
2671 : #else
2672 : IF (fm_matrix%use_sp) THEN
2673 : CALL spotri('U', n, a_sp(1, 1), SIZE(a_sp, 1), info)
2674 : ELSE
2675 : CALL dpotri('U', n, a(1, 1), SIZE(a, 1), info)
2676 : END IF
2677 : #endif
2678 9239 : CPASSERT(info == 0)
2679 9239 : END SUBROUTINE cp_fm_potri
2680 :
2681 : ! **************************************************************************************************
2682 : !> \brief ...
2683 : !> \param fm_matrix ...
2684 : !> \param neig ...
2685 : !> \param fm_matrixb ...
2686 : !> \param fm_matrixout ...
2687 : !> \param op ...
2688 : !> \param pos ...
2689 : !> \param transa ...
2690 : ! **************************************************************************************************
2691 1184 : SUBROUTINE cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
2692 : TYPE(cp_fm_type) :: fm_matrix
2693 : TYPE(cp_fm_type) :: fm_matrixb
2694 : TYPE(cp_fm_type) :: fm_matrixout
2695 : INTEGER, INTENT(IN) :: neig
2696 : CHARACTER(LEN=*), INTENT(IN) :: op
2697 : CHARACTER(LEN=*), INTENT(IN) :: pos
2698 : CHARACTER(LEN=*), INTENT(IN) :: transa
2699 :
2700 1184 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, outm
2701 1184 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, outm_sp
2702 : INTEGER :: n, itype
2703 : REAL(KIND=dp) :: alpha
2704 : #if defined(__parallel)
2705 : INTEGER :: i
2706 : INTEGER, DIMENSION(9) :: desca, descb, descout
2707 : #endif
2708 :
2709 : ! notice b is the cholesky guy
2710 1184 : a => fm_matrix%local_data
2711 1184 : b => fm_matrixb%local_data
2712 1184 : outm => fm_matrixout%local_data
2713 1184 : a_sp => fm_matrix%local_data_sp
2714 1184 : b_sp => fm_matrixb%local_data_sp
2715 1184 : outm_sp => fm_matrixout%local_data_sp
2716 :
2717 1184 : n = fm_matrix%matrix_struct%nrow_global
2718 1184 : itype = 1
2719 :
2720 : #if defined(__parallel)
2721 11840 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2722 11840 : descb(:) = fm_matrixb%matrix_struct%descriptor(:)
2723 11840 : descout(:) = fm_matrixout%matrix_struct%descriptor(:)
2724 1184 : alpha = 1.0_dp
2725 5316 : DO i = 1, neig
2726 5316 : IF (fm_matrix%use_sp) THEN
2727 0 : CALL pscopy(n, a_sp(1, 1), 1, i, desca, 1, outm_sp(1, 1), 1, i, descout, 1)
2728 : ELSE
2729 4132 : CALL pdcopy(n, a(1, 1), 1, i, desca, 1, outm(1, 1), 1, i, descout, 1)
2730 : END IF
2731 : END DO
2732 1184 : IF (op .EQ. "SOLVE") THEN
2733 1184 : IF (fm_matrix%use_sp) THEN
2734 : CALL pstrsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
2735 0 : outm_sp(1, 1), 1, 1, descout)
2736 : ELSE
2737 1184 : CALL pdtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
2738 : END IF
2739 : ELSE
2740 0 : IF (fm_matrix%use_sp) THEN
2741 : CALL pstrmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
2742 0 : outm_sp(1, 1), 1, 1, descout)
2743 : ELSE
2744 0 : CALL pdtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
2745 : END IF
2746 : END IF
2747 : #else
2748 : alpha = 1.0_dp
2749 : IF (fm_matrix%use_sp) THEN
2750 : CALL scopy(neig*n, a_sp(1, 1), 1, outm_sp(1, 1), 1)
2751 : ELSE
2752 : CALL dcopy(neig*n, a(1, 1), 1, outm(1, 1), 1)
2753 : END IF
2754 : IF (op .EQ. "SOLVE") THEN
2755 : IF (fm_matrix%use_sp) THEN
2756 : CALL strsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), SIZE(b_sp, 1), outm_sp(1, 1), n)
2757 : ELSE
2758 : CALL dtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), SIZE(b, 1), outm(1, 1), n)
2759 : END IF
2760 : ELSE
2761 : IF (fm_matrix%use_sp) THEN
2762 : CALL strmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), n, outm_sp(1, 1), n)
2763 : ELSE
2764 : CALL dtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), n, outm(1, 1), n)
2765 : END IF
2766 : END IF
2767 : #endif
2768 :
2769 1184 : END SUBROUTINE cp_fm_cholesky_restore
2770 :
2771 : ! **************************************************************************************************
2772 : !> \brief Calculates
2773 : !> yv = alpha*amat*xv + beta*yv
2774 : !> where amat: fm matrix
2775 : !> xv : vector replicated
2776 : !> yv : vector replicated
2777 : !> Defaults: alpha = 1, beta = 0
2778 : ! **************************************************************************************************
2779 16116 : SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
2780 : TYPE(cp_fm_type), INTENT(IN) :: amat
2781 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: xv
2782 : REAL(KIND=dp), DIMENSION(:), INTENT(INOUT) :: yv
2783 : REAL(KIND=dp), OPTIONAL, INTENT(IN) :: alpha, beta
2784 :
2785 : INTEGER :: na, nc, nx, ny
2786 : REAL(KIND=dp) :: aval, bval
2787 : #if defined(__parallel)
2788 : INTEGER :: nrl, ncl, ic, ir
2789 16116 : INTEGER, DIMENSION(:), POINTER :: rind, cind
2790 16116 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
2791 : #endif
2792 :
2793 16116 : IF (amat%use_sp) THEN
2794 0 : CPABORT("cp_fm_matvec: SP option not available")
2795 : END IF
2796 16116 : aval = 1.0_dp
2797 16116 : IF (PRESENT(alpha)) aval = alpha
2798 16116 : bval = 0.0_dp
2799 16116 : IF (PRESENT(beta)) bval = beta
2800 :
2801 16116 : CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
2802 16116 : nx = SIZE(xv)
2803 16116 : ny = SIZE(yv)
2804 16116 : IF ((nx /= ny) .OR. (nc /= nx)) THEN
2805 0 : CPABORT("cp_fm_matvec: incompatible dimensions")
2806 : END IF
2807 : #if defined(__parallel)
2808 : CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
2809 16116 : row_indices=rind, col_indices=cind)
2810 112812 : ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
2811 152576 : DO ic = 1, ncl
2812 152576 : xvl(ic) = xv(cind(ic))
2813 : END DO
2814 1786706 : yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
2815 152576 : yvm = 0.0_dp
2816 84346 : DO ir = 1, nrl
2817 84346 : yvm(rind(ir)) = yvl(ir)
2818 : END DO
2819 16116 : CALL amat%matrix_struct%para_env%sum(yvm)
2820 16116 : IF (bval == 0.0_dp) THEN
2821 76288 : yv = aval*yvm
2822 : ELSE
2823 76288 : yv = bval*yv + aval*yvm
2824 : END IF
2825 : #else
2826 : IF (bval == 0.0_dp) THEN
2827 : yv = aval*MATMUL(amat%local_data, xv)
2828 : ELSE
2829 : yv = bval*yv + aval*MATMUL(amat%local_data, xv)
2830 : END IF
2831 : #endif
2832 :
2833 64464 : END SUBROUTINE cp_fm_matvec
2834 :
2835 : END MODULE cp_fm_basic_linalg
|