Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief basic linear algebra operations for full matrices
10 : !> \par History
11 : !> 08.2002 split out of qs_blacs [fawzi]
12 : !> \author Fawzi Mohamed
13 : ! **************************************************************************************************
14 : MODULE cp_fm_basic_linalg
15 : USE cp_blacs_env, ONLY: cp_blacs_env_type
16 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
17 : USE cp_fm_types, ONLY: &
18 : cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
19 : cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
20 : cp_fm_type
21 : USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
22 : cp_to_string
23 : USE kahan_sum, ONLY: accurate_dot_product, &
24 : accurate_sum
25 : USE kinds, ONLY: dp, &
26 : int_8, &
27 : sp
28 : USE machine, ONLY: m_memory
29 : USE mathlib, ONLY: get_pseudo_inverse_svd, &
30 : invert_matrix
31 : USE message_passing, ONLY: mp_comm_type
32 : #include "../base/base_uses.f90"
33 :
34 : IMPLICIT NONE
35 : PRIVATE
36 :
37 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
38 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39 :
40 : PUBLIC :: cp_fm_scale, & ! scale a matrix
41 : cp_fm_scale_and_add, & ! scale and add two matrices
42 : cp_fm_geadd, & ! general addition
43 : cp_fm_column_scale, & ! scale columns of a matrix
44 : cp_fm_row_scale, & ! scale rows of a matrix
45 : cp_fm_trace, & ! trace of the transpose(A)*B
46 : cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
47 : cp_fm_norm, & ! different norms of A
48 : cp_fm_schur_product, & ! schur product
49 : cp_fm_transpose, & ! transpose a matrix
50 : cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
51 : cp_fm_syrk, & ! rank k update
52 : cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
53 : cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
54 : cp_fm_gemm, & ! multiply two matrices
55 : cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
56 : cp_fm_invert, & ! computes the inverse and determinant
57 : cp_fm_frobenius_norm, & ! frobenius norm
58 : cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
59 : cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
60 : cp_fm_solve, & ! solves the equation A*B=C A and C are input
61 : cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
62 : cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
63 : cp_fm_potrf, & ! Cholesky decomposition
64 : cp_fm_potri, & ! Invert triangular matrix
65 : cp_fm_rot_rows, & ! rotates two rows
66 : cp_fm_rot_cols, & ! rotates two columns
67 : cp_fm_cholesky_restore, & ! apply Cholesky decomposition
68 : cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
69 : cp_fm_det, & ! determinant of a real matrix with correct sign
70 : cp_fm_matvec ! matrix-vector multiplication (vector replicated)
71 :
72 : REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
73 : REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
74 :
75 : INTERFACE cp_fm_trace
76 : MODULE PROCEDURE cp_fm_trace_a0b0t0
77 : MODULE PROCEDURE cp_fm_trace_a1b0t1_a
78 : MODULE PROCEDURE cp_fm_trace_a1b0t1_p
79 : MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
80 : MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
81 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
82 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
83 : END INTERFACE cp_fm_trace
84 :
85 : INTERFACE cp_fm_contracted_trace
86 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
87 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
88 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
89 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
90 : END INTERFACE cp_fm_contracted_trace
91 : CONTAINS
92 :
93 : ! **************************************************************************************************
94 : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
95 : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
96 : ! **************************************************************************************************
97 0 : SUBROUTINE cp_fm_det(matrix_a, det_a)
98 :
99 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
100 : REAL(KIND=dp), INTENT(OUT) :: det_a
101 : REAL(KIND=dp) :: determinant
102 : TYPE(cp_fm_type) :: matrix_lu
103 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
104 : INTEGER :: n, i, info, P
105 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
106 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
107 :
108 : #if defined(__parallel)
109 : INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
110 : INTEGER, DIMENSION(9) :: desca
111 : #endif
112 :
113 : CALL cp_fm_create(matrix=matrix_lu, &
114 : matrix_struct=matrix_a%matrix_struct, &
115 0 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
116 0 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
117 :
118 0 : a => matrix_lu%local_data
119 0 : n = matrix_lu%matrix_struct%nrow_global
120 0 : ALLOCATE (ipivot(n))
121 0 : ipivot(:) = 0
122 0 : P = 0
123 0 : ALLOCATE (diag(n))
124 0 : diag(:) = 0.0_dp
125 : #if defined(__parallel)
126 : ! Use LU decomposition
127 0 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
128 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
129 0 : CALL cp_fm_get_diag(matrix_lu, diag)
130 0 : determinant = PRODUCT(diag)
131 0 : myprow = matrix_lu%matrix_struct%context%mepos(1)
132 0 : nprow = matrix_lu%matrix_struct%context%num_pe(1)
133 0 : npcol = matrix_lu%matrix_struct%context%num_pe(2)
134 0 : nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
135 0 : nrow_block = matrix_lu%matrix_struct%nrow_block
136 0 : DO irow_local = 1, nrow_local
137 0 : i = matrix_lu%matrix_struct%row_indices(irow_local)
138 0 : IF (ipivot(irow_local) /= i) P = P + 1
139 : END DO
140 0 : CALL matrix_lu%matrix_struct%para_env%sum(P)
141 : ! very important fix
142 0 : P = P/npcol
143 : #else
144 : CALL dgetrf(n, n, a, n, ipivot, info)
145 : CALL cp_fm_get_diag(matrix_lu, diag)
146 : determinant = PRODUCT(diag)
147 : DO i = 1, n
148 : IF (ipivot(i) /= i) P = P + 1
149 : END DO
150 : #endif
151 0 : DEALLOCATE (ipivot)
152 0 : DEALLOCATE (diag)
153 0 : CALL cp_fm_release(matrix_lu)
154 0 : det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
155 0 : END SUBROUTINE cp_fm_det
156 :
157 : ! **************************************************************************************************
158 : !> \brief calc A <- alpha*A + beta*B
159 : !> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
160 : !> scale A)
161 : !> \param alpha ...
162 : !> \param matrix_a ...
163 : !> \param beta ...
164 : !> \param matrix_b ...
165 : ! **************************************************************************************************
166 1062340 : SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
167 :
168 : REAL(KIND=dp), INTENT(IN) :: alpha
169 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
170 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: beta
171 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
172 :
173 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
174 :
175 : INTEGER :: handle, size_a, size_b
176 : REAL(KIND=dp) :: my_beta
177 1062340 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
178 1062340 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
179 :
180 1062340 : CALL timeset(routineN, handle)
181 :
182 1062340 : my_beta = 0.0_dp
183 1062340 : IF (PRESENT(matrix_b)) my_beta = 1.0_dp
184 1062340 : IF (PRESENT(beta)) my_beta = beta
185 1062340 : NULLIFY (a, b)
186 :
187 1062340 : IF (PRESENT(beta)) THEN
188 1062340 : CPASSERT(PRESENT(matrix_b))
189 1062340 : IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
190 0 : CPWARN("Bad use of routine. Call cp_fm_scale instead")
191 0 : CALL cp_fm_scale(alpha + beta, matrix_a)
192 0 : CALL timestop(handle)
193 0 : RETURN
194 : END IF
195 : END IF
196 :
197 1062340 : a => matrix_a%local_data
198 1062340 : a_sp => matrix_a%local_data_sp
199 :
200 1062340 : IF (matrix_a%use_sp) THEN
201 0 : size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
202 : ELSE
203 1062340 : size_a = SIZE(a, 1)*SIZE(a, 2)
204 : END IF
205 :
206 1062340 : IF (alpha .NE. 1.0_dp) THEN
207 78754 : IF (matrix_a%use_sp) THEN
208 0 : CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
209 : ELSE
210 78754 : CALL dscal(size_a, alpha, a, 1)
211 : END IF
212 : END IF
213 1062340 : IF (my_beta .NE. 0.0_dp) THEN
214 1054972 : IF (matrix_a%matrix_struct%context .NE. matrix_b%matrix_struct%context) &
215 0 : CPABORT("Matrices must be in the same blacs context")
216 :
217 1054972 : IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
218 : matrix_b%matrix_struct)) THEN
219 :
220 1054972 : b => matrix_b%local_data
221 1054972 : b_sp => matrix_b%local_data_sp
222 1054972 : IF (matrix_b%use_sp) THEN
223 0 : size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
224 : ELSE
225 1054972 : size_b = SIZE(b, 1)*SIZE(b, 2)
226 : END IF
227 1054972 : IF (size_a .NE. size_b) &
228 0 : CPABORT("Matrices must have same local sizes")
229 :
230 1054972 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
231 0 : CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
232 1054972 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
233 0 : CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
234 1054972 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
235 0 : CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
236 : ELSE
237 1054972 : CALL daxpy(size_a, my_beta, b, 1, a, 1)
238 : END IF
239 :
240 : ELSE
241 : #ifdef __parallel
242 0 : CPABORT("to do (pdscal,pdcopy,pdaxpy)")
243 : #else
244 : CPABORT("")
245 : #endif
246 : END IF
247 :
248 : END IF
249 :
250 1062340 : CALL timestop(handle)
251 :
252 1062340 : END SUBROUTINE cp_fm_scale_and_add
253 :
254 : ! **************************************************************************************************
255 : !> \brief interface to BLACS geadd:
256 : !> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
257 : !> where opt(matrix_a) can be either:
258 : !> 'N': matrix_a
259 : !> 'T': matrix_a^T
260 : !> 'C': matrix_a^H (Hermitian conjugate)
261 : !> note that this is a level three routine, use cp_fm_scale_and_add if that
262 : !> is sufficient for your needs
263 : !> \param alpha : complex scalar
264 : !> \param trans : 'N' normal, 'T' transposed
265 : !> \param matrix_a : input matrix_a
266 : !> \param beta : complex scalar
267 : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
268 : !> \author Lianheng Tong
269 : ! **************************************************************************************************
270 96 : SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
271 : REAL(KIND=dp), INTENT(IN) :: alpha, beta
272 : CHARACTER, INTENT(IN) :: trans
273 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
274 :
275 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
276 :
277 : INTEGER :: nrow_global, ncol_global, handle
278 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
279 : #if defined(__parallel)
280 : INTEGER, DIMENSION(9) :: desca, descb
281 : #elif !defined(__MKL)
282 : INTEGER :: ii, jj
283 : #endif
284 :
285 96 : CALL timeset(routineN, handle)
286 :
287 96 : nrow_global = matrix_a%matrix_struct%nrow_global
288 96 : ncol_global = matrix_a%matrix_struct%ncol_global
289 96 : CPASSERT(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
290 96 : CPASSERT(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
291 :
292 96 : aa => matrix_a%local_data
293 96 : bb => matrix_b%local_data
294 :
295 : #if defined(__parallel)
296 960 : desca = matrix_a%matrix_struct%descriptor
297 960 : descb = matrix_b%matrix_struct%descriptor
298 : CALL pdgeadd(trans, &
299 : nrow_global, &
300 : ncol_global, &
301 : alpha, &
302 : aa, &
303 : 1, 1, &
304 : desca, &
305 : beta, &
306 : bb, &
307 : 1, 1, &
308 96 : descb)
309 : #elif defined(__MKL)
310 : CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
311 : alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
312 : #else
313 : ! dgeadd is not a standard BLAS function, although it is implemented
314 : ! in some libraries like OpenBLAS, so not going to use it here
315 : SELECT CASE (trans)
316 : CASE ('T')
317 : DO jj = 1, ncol_global
318 : DO ii = 1, nrow_global
319 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
320 : END DO
321 : END DO
322 : CASE DEFAULT
323 : DO jj = 1, ncol_global
324 : DO ii = 1, nrow_global
325 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
326 : END DO
327 : END DO
328 : END SELECT
329 : #endif
330 :
331 96 : CALL timestop(handle)
332 :
333 96 : END SUBROUTINE cp_fm_geadd
334 :
335 : ! **************************************************************************************************
336 : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
337 : !> IMPORTANT : the sign of the determinant is not defined correctly yet ....
338 : !> \param matrix_a ...
339 : !> \param almost_determinant ...
340 : !> \param correct_sign ...
341 : !> \par History
342 : !> added correct_sign 02.07 (fschiff)
343 : !> \author Joost VandeVondele
344 : !> \note
345 : !> - matrix_a is overwritten
346 : !> - the sign of the determinant might be wrong
347 : !> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
348 : !> - one should be able to find out if ipivot is an even or an odd permutation...
349 : !> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
350 : !> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
351 : ! **************************************************************************************************
352 0 : SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
353 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
354 : REAL(KIND=dp), INTENT(OUT) :: almost_determinant
355 : LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
356 :
357 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
358 :
359 : INTEGER :: handle, i, info, n
360 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
361 : REAL(KIND=dp) :: determinant
362 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
363 : #if defined(__parallel)
364 : INTEGER, DIMENSION(9) :: desca
365 0 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
366 : #else
367 : INTEGER :: lda
368 : #endif
369 :
370 0 : CALL timeset(routineN, handle)
371 :
372 0 : a => matrix_a%local_data
373 0 : n = matrix_a%matrix_struct%nrow_global
374 0 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
375 :
376 : #if defined(__parallel)
377 : MARK_USED(correct_sign)
378 0 : desca(:) = matrix_a%matrix_struct%descriptor(:)
379 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
380 :
381 0 : ALLOCATE (diag(n))
382 0 : diag(:) = 0.0_dp
383 0 : CALL cp_fm_get_diag(matrix_a, diag)
384 0 : determinant = 1.0_dp
385 0 : DO i = 1, n
386 0 : determinant = determinant*diag(i)
387 : END DO
388 0 : DEALLOCATE (diag)
389 : #else
390 : lda = SIZE(a, 1)
391 : CALL dgetrf(n, n, a, lda, ipivot, info)
392 : determinant = 1.0_dp
393 : IF (correct_sign) THEN
394 : DO i = 1, n
395 : IF (ipivot(i) .NE. i) THEN
396 : determinant = -determinant*a(i, i)
397 : ELSE
398 : determinant = determinant*a(i, i)
399 : END IF
400 : END DO
401 : ELSE
402 : DO i = 1, n
403 : determinant = determinant*a(i, i)
404 : END DO
405 : END IF
406 : #endif
407 : ! info is allowed to be zero
408 : ! this does just signal a zero diagonal element
409 0 : DEALLOCATE (ipivot)
410 0 : almost_determinant = determinant ! notice that the sign is random
411 0 : CALL timestop(handle)
412 0 : END SUBROUTINE cp_fm_lu_decompose
413 :
414 : ! **************************************************************************************************
415 : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
416 : !> \param transa : 'N' -> normal 'T' -> transpose
417 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
418 : !> \param transb ...
419 : !> \param m ...
420 : !> \param n ...
421 : !> \param k ...
422 : !> \param alpha ...
423 : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
424 : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
425 : !> \param beta ...
426 : !> \param matrix_c : m x n matrix
427 : !> \param a_first_col ...
428 : !> \param a_first_row ...
429 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
430 : !> \param b_first_row ...
431 : !> \param c_first_col ...
432 : !> \param c_first_row ...
433 : !> \author Matthias Krack
434 : !> \note
435 : !> matrix_c should have no overlap with matrix_a, matrix_b
436 : ! **************************************************************************************************
437 550 : SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
438 : matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
439 : c_first_col, c_first_row)
440 :
441 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
442 : INTEGER, INTENT(IN) :: m, n, k
443 : REAL(KIND=dp), INTENT(IN) :: alpha
444 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
445 : REAL(KIND=dp), INTENT(IN) :: beta
446 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
447 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
448 : b_first_col, b_first_row, &
449 : c_first_col, c_first_row
450 :
451 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
452 :
453 : INTEGER :: handle, i_a, i_b, i_c, j_a, &
454 : j_b, j_c
455 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
456 550 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
457 : #if defined(__parallel)
458 : INTEGER, DIMENSION(9) :: desca, descb, descc
459 : #else
460 : INTEGER :: lda, ldb, ldc
461 : #endif
462 :
463 550 : CALL timeset(routineN, handle)
464 :
465 : !sample peak memory
466 550 : CALL m_memory()
467 :
468 550 : a => matrix_a%local_data
469 550 : b => matrix_b%local_data
470 550 : c => matrix_c%local_data
471 :
472 550 : a_sp => matrix_a%local_data_sp
473 550 : b_sp => matrix_b%local_data_sp
474 550 : c_sp => matrix_c%local_data_sp
475 :
476 550 : i_a = 1
477 550 : IF (PRESENT(a_first_row)) i_a = a_first_row
478 :
479 550 : j_a = 1
480 550 : IF (PRESENT(a_first_col)) j_a = a_first_col
481 :
482 550 : i_b = 1
483 550 : IF (PRESENT(b_first_row)) i_b = b_first_row
484 :
485 550 : j_b = 1
486 550 : IF (PRESENT(b_first_col)) j_b = b_first_col
487 :
488 550 : i_c = 1
489 550 : IF (PRESENT(c_first_row)) i_c = c_first_row
490 :
491 550 : j_c = 1
492 550 : IF (PRESENT(c_first_col)) j_c = c_first_col
493 :
494 : #if defined(__parallel)
495 :
496 5500 : desca(:) = matrix_a%matrix_struct%descriptor(:)
497 5500 : descb(:) = matrix_b%matrix_struct%descriptor(:)
498 5500 : descc(:) = matrix_c%matrix_struct%descriptor(:)
499 :
500 550 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
501 :
502 : CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
503 0 : descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
504 :
505 550 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
506 :
507 : CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
508 550 : descb, beta, c, i_c, j_c, descc)
509 :
510 : ELSE
511 0 : CPABORT("Mixed precision gemm NYI")
512 : END IF
513 : #else
514 :
515 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
516 :
517 : lda = SIZE(a_sp, 1)
518 : ldb = SIZE(b_sp, 1)
519 : ldc = SIZE(c_sp, 1)
520 :
521 : CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
522 : REAL(beta, sp), c_sp(i_c, j_c), ldc)
523 :
524 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
525 :
526 : lda = SIZE(a, 1)
527 : ldb = SIZE(b, 1)
528 : ldc = SIZE(c, 1)
529 :
530 : CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
531 :
532 : ELSE
533 : CPABORT("Mixed precision gemm NYI")
534 : END IF
535 :
536 : #endif
537 550 : CALL timestop(handle)
538 :
539 550 : END SUBROUTINE cp_fm_gemm
540 :
541 : ! **************************************************************************************************
542 : !> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
543 : !> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
544 : !> where matrix_a is symmetric
545 : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
546 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
547 : !> \param uplo triangular format
548 : !> \param m ...
549 : !> \param n ...
550 : !> \param alpha ...
551 : !> \param matrix_a : m x m matrix
552 : !> \param matrix_b : m x n matrix
553 : !> \param beta ...
554 : !> \param matrix_c : m x n matrix
555 : !> \author Matthias Krack
556 : !> \note
557 : !> matrix_c should have no overlap with matrix_a, matrix_b
558 : !> all matrices in QS are triangular according to uplo
559 : !> matrix_a is always an m x m matrix
560 : !> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
561 : ! **************************************************************************************************
562 146560 : SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
563 :
564 : CHARACTER(LEN=1), INTENT(IN) :: side, uplo
565 : INTEGER, INTENT(IN) :: m, n
566 : REAL(KIND=dp), INTENT(IN) :: alpha
567 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
568 : REAL(KIND=dp), INTENT(IN) :: beta
569 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
570 :
571 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
572 :
573 : INTEGER :: handle
574 146560 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
575 : #if defined(__parallel)
576 : INTEGER, DIMENSION(9) :: desca, descb, descc
577 : #else
578 : INTEGER :: lda, ldb, ldc
579 : #endif
580 :
581 146560 : CALL timeset(routineN, handle)
582 :
583 146560 : a => matrix_a%local_data
584 146560 : b => matrix_b%local_data
585 146560 : c => matrix_c%local_data
586 :
587 : #if defined(__parallel)
588 :
589 1465600 : desca(:) = matrix_a%matrix_struct%descriptor(:)
590 1465600 : descb(:) = matrix_b%matrix_struct%descriptor(:)
591 1465600 : descc(:) = matrix_c%matrix_struct%descriptor(:)
592 :
593 146560 : CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
594 :
595 : #else
596 :
597 : lda = matrix_a%matrix_struct%local_leading_dimension
598 : ldb = matrix_b%matrix_struct%local_leading_dimension
599 : ldc = matrix_c%matrix_struct%local_leading_dimension
600 :
601 : CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
602 :
603 : #endif
604 146560 : CALL timestop(handle)
605 :
606 146560 : END SUBROUTINE cp_fm_symm
607 :
608 : ! **************************************************************************************************
609 : !> \brief computes the Frobenius norm of matrix_a
610 : !> \brief computes the Frobenius norm of matrix_a
611 : !> \param matrix_a : m x n matrix
612 : !> \return ...
613 : !> \author VW
614 : ! **************************************************************************************************
615 7926 : FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
616 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
617 : REAL(KIND=dp) :: norm
618 :
619 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
620 :
621 : INTEGER :: handle, size_a
622 7926 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
623 : REAL(KIND=dp), EXTERNAL :: DDOT
624 : #if defined(__parallel)
625 : TYPE(mp_comm_type) :: group
626 : #endif
627 :
628 7926 : CALL timeset(routineN, handle)
629 :
630 : norm = 0.0_dp
631 7926 : a => matrix_a%local_data
632 7926 : size_a = SIZE(a, 1)*SIZE(a, 2)
633 7926 : norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
634 : #if defined(__parallel)
635 7926 : group = matrix_a%matrix_struct%para_env
636 7926 : CALL group%sum(norm)
637 : #endif
638 7926 : norm = SQRT(norm)
639 :
640 7926 : CALL timestop(handle)
641 :
642 7926 : END FUNCTION cp_fm_frobenius_norm
643 :
644 : ! **************************************************************************************************
645 : !> \brief performs a rank-k update of a symmetric matrix_c
646 : !> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
647 : !> \param uplo : 'U' ('L')
648 : !> \param trans : 'N' ('T')
649 : !> \param k : number of cols to use in matrix_a
650 : !> ia,ja :: 1,1 (could be used for selecting subblock of a)
651 : !> \param alpha ...
652 : !> \param matrix_a ...
653 : !> \param ia ...
654 : !> \param ja ...
655 : !> \param beta ...
656 : !> \param matrix_c ...
657 : !> \author Matthias Krack
658 : ! **************************************************************************************************
659 7370 : SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
660 : CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
661 : INTEGER, INTENT(IN) :: k
662 : REAL(KIND=dp), INTENT(IN) :: alpha
663 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
664 : INTEGER, INTENT(IN) :: ia, ja
665 : REAL(KIND=dp), INTENT(IN) :: beta
666 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
667 :
668 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
669 :
670 : INTEGER :: handle, n
671 7370 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
672 : #if defined(__parallel)
673 : INTEGER, DIMENSION(9) :: desca, descc
674 : #else
675 : INTEGER :: lda, ldc
676 : #endif
677 :
678 7370 : CALL timeset(routineN, handle)
679 :
680 7370 : n = matrix_c%matrix_struct%nrow_global
681 :
682 7370 : a => matrix_a%local_data
683 7370 : c => matrix_c%local_data
684 :
685 : #if defined(__parallel)
686 :
687 73700 : desca(:) = matrix_a%matrix_struct%descriptor(:)
688 73700 : descc(:) = matrix_c%matrix_struct%descriptor(:)
689 :
690 7370 : CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
691 :
692 : #else
693 :
694 : lda = SIZE(a, 1)
695 : ldc = SIZE(c, 1)
696 :
697 : CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
698 :
699 : #endif
700 7370 : CALL timestop(handle)
701 :
702 7370 : END SUBROUTINE cp_fm_syrk
703 :
704 : ! **************************************************************************************************
705 : !> \brief computes the schur product of two matrices
706 : !> c_ij = a_ij * b_ij
707 : !> \param matrix_a ...
708 : !> \param matrix_b ...
709 : !> \param matrix_c ...
710 : !> \author Joost VandeVondele
711 : ! **************************************************************************************************
712 8624 : SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
713 :
714 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
715 :
716 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
717 :
718 : INTEGER :: handle, icol_local, irow_local, mypcol, &
719 : myprow, ncol_local, npcol, nprow, &
720 : nrow_local
721 8624 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
722 : TYPE(cp_blacs_env_type), POINTER :: context
723 :
724 8624 : CALL timeset(routineN, handle)
725 :
726 8624 : context => matrix_a%matrix_struct%context
727 8624 : myprow = context%mepos(1)
728 8624 : mypcol = context%mepos(2)
729 8624 : nprow = context%num_pe(1)
730 8624 : npcol = context%num_pe(2)
731 :
732 8624 : a => matrix_a%local_data
733 8624 : b => matrix_b%local_data
734 8624 : c => matrix_c%local_data
735 :
736 8624 : nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
737 8624 : ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
738 :
739 95822 : DO icol_local = 1, ncol_local
740 6656427 : DO irow_local = 1, nrow_local
741 6647803 : c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
742 : END DO
743 : END DO
744 :
745 8624 : CALL timestop(handle)
746 :
747 8624 : END SUBROUTINE cp_fm_schur_product
748 :
749 : ! **************************************************************************************************
750 : !> \brief returns the trace of matrix_a^T matrix_b, i.e
751 : !> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
752 : !> \param matrix_a a matrix
753 : !> \param matrix_b another matrix
754 : !> \param trace ...
755 : !> \par History
756 : !> 11.06.2001 Creation (Matthias Krack)
757 : !> 12.2002 added doc [fawzi]
758 : !> \author Matthias Krack
759 : !> \note
760 : !> note the transposition of matrix_a!
761 : ! **************************************************************************************************
762 696420 : SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
763 :
764 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
765 : REAL(KIND=dp), INTENT(OUT) :: trace
766 :
767 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
768 :
769 : INTEGER :: handle, mypcol, myprow, ncol_local, &
770 : npcol, nprow, nrow_local
771 696420 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
772 696420 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
773 : TYPE(cp_blacs_env_type), POINTER :: context
774 : TYPE(mp_comm_type) :: group
775 :
776 696420 : CALL timeset(routineN, handle)
777 :
778 696420 : context => matrix_a%matrix_struct%context
779 696420 : myprow = context%mepos(1)
780 696420 : mypcol = context%mepos(2)
781 696420 : nprow = context%num_pe(1)
782 696420 : npcol = context%num_pe(2)
783 :
784 696420 : group = matrix_a%matrix_struct%para_env
785 :
786 696420 : a => matrix_a%local_data
787 696420 : b => matrix_b%local_data
788 :
789 696420 : a_sp => matrix_a%local_data_sp
790 696420 : b_sp => matrix_b%local_data_sp
791 :
792 696420 : nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
793 696420 : ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
794 :
795 : ! cries for an accurate_dot_product
796 696420 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
797 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
798 0 : b_sp(1:nrow_local, 1:ncol_local), dp))
799 696420 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
800 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
801 0 : b(1:nrow_local, 1:ncol_local))
802 696420 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
803 : trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
804 0 : REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
805 : ELSE
806 : trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
807 696420 : b(1:nrow_local, 1:ncol_local))
808 : END IF
809 :
810 696420 : CALL group%sum(trace)
811 :
812 696420 : CALL timestop(handle)
813 :
814 696420 : END SUBROUTINE cp_fm_trace_a0b0t0
815 :
816 : #:mute
817 : #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
818 : #:endmute
819 :
820 : ! **************************************************************************************************
821 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
822 : !> \param matrix_a list of A matrices
823 : !> \param matrix_b B matrix
824 : !> \param trace computed traces
825 : !> \par History
826 : !> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
827 : !> \note \parblock
828 : !> Computing the trace requires collective communication between involved MPI processes
829 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
830 : !> the amount of time wasted in such synchronisation by performing one large collective
831 : !> operation which involves all the matrices in question.
832 : !>
833 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
834 : !> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
835 : !> 'matrix_b' is a single matrix.
836 : !> \endparblock
837 : ! **************************************************************************************************
838 : #:for longname, shortname, appendix in types
839 3042 : SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
840 : TYPE(${longname}$), DIMENSION(:), INTENT(IN) :: matrix_a
841 : TYPE(cp_fm_type), INTENT(IN) :: matrix_b
842 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
843 :
844 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
845 :
846 : INTEGER :: handle, imatrix, n_matrices, &
847 : ncols_local, nrows_local
848 : LOGICAL :: use_sp_a, use_sp_b
849 3042 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
850 3042 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
851 : TYPE(mp_comm_type) :: group
852 :
853 3042 : CALL timeset(routineN, handle)
854 :
855 3042 : n_matrices = SIZE(trace)
856 3042 : CPASSERT(SIZE(matrix_a) == n_matrices)
857 :
858 3042 : CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
859 3042 : use_sp_b = matrix_b%use_sp
860 :
861 3042 : IF (use_sp_b) THEN
862 0 : ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
863 : ELSE
864 3042 : ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
865 : END IF
866 :
867 : !$OMP PARALLEL DO DEFAULT(NONE), &
868 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
869 : !$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
870 3042 : !$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
871 :
872 : DO imatrix = 1, n_matrices
873 :
874 : use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
875 :
876 : ! assume that the matrices A(i) and B have identical shapes and distribution schemes
877 : IF (use_sp_a .AND. use_sp_b) THEN
878 : ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
879 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
880 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
881 : ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
882 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
883 : ELSE
884 : CPABORT("Matrices A and B are of different types")
885 : END IF
886 : END DO
887 : !$OMP END PARALLEL DO
888 :
889 3042 : group = matrix_b%matrix_struct%para_env
890 18966 : CALL group%sum(trace)
891 :
892 3042 : CALL timestop(handle)
893 3042 : END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
894 : #:endfor
895 :
896 : ! **************************************************************************************************
897 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
898 : !> \param matrix_a list of A matrices
899 : !> \param matrix_b list of B matrices
900 : !> \param trace computed traces
901 : !> \param accurate ...
902 : !> \par History
903 : !> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
904 : !> \note \parblock
905 : !> Computing the trace requires collective communication between involved MPI processes
906 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
907 : !> the amount of time wasted in such synchronisation by performing one large collective
908 : !> operation which involves all the matrices in question.
909 : !>
910 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
911 : !> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
912 : !> \endparblock
913 : ! **************************************************************************************************
914 : #:for longname1, shortname1, appendix1 in types
915 : #:for longname2, shortname2, appendix2 in types
916 138878 : SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
917 : TYPE(${longname1}$), DIMENSION(:), INTENT(IN) :: matrix_a
918 : TYPE(${longname2}$), DIMENSION(:), INTENT(IN) :: matrix_b
919 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
920 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
921 :
922 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
923 :
924 : INTEGER :: handle, imatrix, n_matrices, &
925 : ncols_local, nrows_local
926 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
927 138878 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
928 138878 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
929 : TYPE(mp_comm_type) :: group
930 :
931 138878 : CALL timeset(routineN, handle)
932 :
933 138878 : n_matrices = SIZE(trace)
934 138878 : CPASSERT(SIZE(matrix_a) == n_matrices)
935 138878 : CPASSERT(SIZE(matrix_b) == n_matrices)
936 :
937 138878 : use_accurate_sum = .TRUE.
938 138878 : IF (PRESENT(accurate)) use_accurate_sum = accurate
939 :
940 : !$OMP PARALLEL DO DEFAULT(NONE), &
941 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
942 : !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
943 138878 : !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
944 : DO imatrix = 1, n_matrices
945 : CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
946 :
947 : use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
948 : use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
949 :
950 : ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
951 : IF (use_sp_a .AND. use_sp_b) THEN
952 : ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
953 : ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
954 : IF (use_accurate_sum) THEN
955 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
956 : ELSE
957 : trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
958 : END IF
959 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
960 : ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
961 : ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
962 : IF (use_accurate_sum) THEN
963 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
964 : ELSE
965 : trace(imatrix) = SUM(ldata_a*ldata_b)
966 : END IF
967 : ELSE
968 : CPABORT("Matrices A and B are of different types")
969 : END IF
970 : END DO
971 : !$OMP END PARALLEL DO
972 :
973 138878 : group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
974 463638 : CALL group%sum(trace)
975 :
976 138878 : CALL timestop(handle)
977 138878 : END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
978 : #:endfor
979 : #:endfor
980 :
981 : ! **************************************************************************************************
982 : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
983 : !> \param matrix_a list of A matrices
984 : !> \param matrix_b list of B matrices
985 : !> \param trace computed traces
986 : !> \param accurate ...
987 : ! **************************************************************************************************
988 : #:for longname1, shortname1, appendix1 in types
989 : #:for longname2, shortname2, appendix2 in types
990 13252 : SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
991 : TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN) :: matrix_a
992 : TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN) :: matrix_b
993 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
994 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
995 :
996 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
997 :
998 : INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
999 : nrows_local, nz
1000 : INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1001 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1002 : REAL(kind=dp) :: t
1003 13252 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1004 13252 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1005 : TYPE(mp_comm_type) :: group
1006 :
1007 13252 : CALL timeset(routineN, handle)
1008 :
1009 13252 : nz = SIZE(matrix_a, 1)
1010 13252 : CPASSERT(SIZE(matrix_b, 1) == nz)
1011 :
1012 13252 : na = SIZE(matrix_a, 2)
1013 13252 : nb = SIZE(matrix_b, 2)
1014 13252 : CPASSERT(SIZE(trace, 1) == na)
1015 13252 : CPASSERT(SIZE(trace, 2) == nb)
1016 :
1017 13252 : use_accurate_sum = .TRUE.
1018 13252 : IF (PRESENT(accurate)) use_accurate_sum = accurate
1019 :
1020 : ! here we use one running index (itrace) instead of two (ia, ib) in order to
1021 : ! improve load balance between shared-memory threads
1022 13252 : ntraces = na*nb
1023 13252 : na8 = INT(na, kind=int_8)
1024 :
1025 : !$OMP PARALLEL DO DEFAULT(NONE), &
1026 : !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1027 : !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1028 13252 : !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1029 : DO itrace = 1, ntraces
1030 : ib8 = (itrace - 1)/na8
1031 : ia = INT(itrace - ib8*na8)
1032 : ib = INT(ib8) + 1
1033 :
1034 : t = 0.0_dp
1035 : DO iz = 1, nz
1036 : CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
1037 : use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
1038 : use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
1039 :
1040 : ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1041 : IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1042 : ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
1043 : ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
1044 : IF (use_accurate_sum) THEN
1045 : t = t + accurate_dot_product(ldata_a, ldata_b)
1046 : ELSE
1047 : t = t + SUM(ldata_a*ldata_b)
1048 : END IF
1049 : ELSE IF (use_sp_a .AND. use_sp_b) THEN
1050 : ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
1051 : ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
1052 : IF (use_accurate_sum) THEN
1053 : t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1054 : ELSE
1055 : t = t + SUM(ldata_a_sp*ldata_b_sp)
1056 : END IF
1057 : ELSE
1058 : CPABORT("Matrices A and B are of different types")
1059 : END IF
1060 : END DO
1061 : trace(ia, ib) = t
1062 : END DO
1063 : !$OMP END PARALLEL DO
1064 :
1065 13252 : group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
1066 621616 : CALL group%sum(trace)
1067 :
1068 13252 : CALL timestop(handle)
1069 13252 : END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
1070 : #:endfor
1071 : #:endfor
1072 :
1073 : ! **************************************************************************************************
1074 : !> \brief multiplies in place by a triangular matrix:
1075 : !> matrix_b = alpha op(triangular_matrix) matrix_b
1076 : !> or (if side='R')
1077 : !> matrix_b = alpha matrix_b op(triangular_matrix)
1078 : !> op(triangular_matrix) is:
1079 : !> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1080 : !> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1081 : !> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1082 : !> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1083 : !> \param triangular_matrix the triangular matrix that multiplies the other
1084 : !> \param matrix_b the matrix that gets multiplied and stores the result
1085 : !> \param side on which side of matrix_b stays op(triangular_matrix)
1086 : !> (defaults to 'L')
1087 : !> \param transpose_tr if the triangular matrix should be transposed
1088 : !> (defaults to false)
1089 : !> \param invert_tr if the triangular matrix should be inverted
1090 : !> (defaults to false)
1091 : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1092 : !> lower ('L') triangle (defaults to 'U')
1093 : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1094 : !> be assumed to be 1 (defaults to false)
1095 : !> \param n_rows the number of rows of the result (defaults to
1096 : !> size(matrix_b,1))
1097 : !> \param n_cols the number of columns of the result (defaults to
1098 : !> size(matrix_b,2))
1099 : !> \param alpha ...
1100 : !> \par History
1101 : !> 08.2002 created [fawzi]
1102 : !> \author Fawzi Mohamed
1103 : !> \note
1104 : !> needs an mpi env
1105 : ! **************************************************************************************************
1106 107130 : SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1107 : transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1108 : alpha)
1109 : TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1110 : CHARACTER, INTENT(IN), OPTIONAL :: side
1111 : LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1112 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1113 : LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1114 : INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1115 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: alpha
1116 :
1117 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
1118 :
1119 : CHARACTER :: side_char, transa, unit_diag, uplo
1120 : INTEGER :: handle, m, n
1121 : LOGICAL :: invert
1122 : REAL(KIND=dp) :: al
1123 :
1124 53565 : CALL timeset(routineN, handle)
1125 53565 : side_char = 'L'
1126 53565 : unit_diag = 'N'
1127 53565 : uplo = 'U'
1128 53565 : transa = 'N'
1129 53565 : invert = .FALSE.
1130 53565 : al = 1.0_dp
1131 53565 : CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1132 53565 : IF (PRESENT(side)) side_char = side
1133 53565 : IF (PRESENT(invert_tr)) invert = invert_tr
1134 53565 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1135 53565 : IF (PRESENT(unit_diag_tr)) THEN
1136 0 : IF (unit_diag_tr) THEN
1137 0 : unit_diag = 'U'
1138 : ELSE
1139 : unit_diag = 'N'
1140 : END IF
1141 : END IF
1142 53565 : IF (PRESENT(transpose_tr)) THEN
1143 3564 : IF (transpose_tr) THEN
1144 1296 : transa = 'T'
1145 : ELSE
1146 : transa = 'N'
1147 : END IF
1148 : END IF
1149 53565 : IF (PRESENT(alpha)) al = alpha
1150 53565 : IF (PRESENT(n_rows)) m = n_rows
1151 53565 : IF (PRESENT(n_cols)) n = n_cols
1152 :
1153 53565 : IF (invert) THEN
1154 :
1155 : #if defined(__parallel)
1156 : CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1157 : triangular_matrix%local_data(1, 1), 1, 1, &
1158 : triangular_matrix%matrix_struct%descriptor, &
1159 : matrix_b%local_data(1, 1), 1, 1, &
1160 43385 : matrix_b%matrix_struct%descriptor(1))
1161 : #else
1162 : CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1163 : triangular_matrix%local_data(1, 1), &
1164 : SIZE(triangular_matrix%local_data, 1), &
1165 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1166 : #endif
1167 :
1168 : ELSE
1169 :
1170 : #if defined(__parallel)
1171 : CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1172 : triangular_matrix%local_data(1, 1), 1, 1, &
1173 : triangular_matrix%matrix_struct%descriptor, &
1174 : matrix_b%local_data(1, 1), 1, 1, &
1175 10180 : matrix_b%matrix_struct%descriptor(1))
1176 : #else
1177 : CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1178 : triangular_matrix%local_data(1, 1), &
1179 : SIZE(triangular_matrix%local_data, 1), &
1180 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1181 : #endif
1182 :
1183 : END IF
1184 :
1185 53565 : CALL timestop(handle)
1186 53565 : END SUBROUTINE cp_fm_triangular_multiply
1187 :
1188 : ! **************************************************************************************************
1189 : !> \brief scales a matrix
1190 : !> matrix_a = alpha * matrix_b
1191 : !> \param alpha ...
1192 : !> \param matrix_a ...
1193 : !> \note
1194 : !> use cp_fm_set_all to zero (avoids problems with nan)
1195 : ! **************************************************************************************************
1196 80449 : SUBROUTINE cp_fm_scale(alpha, matrix_a)
1197 : REAL(KIND=dp), INTENT(IN) :: alpha
1198 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1199 :
1200 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale'
1201 :
1202 : INTEGER :: handle, size_a
1203 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1204 :
1205 80449 : CALL timeset(routineN, handle)
1206 :
1207 : NULLIFY (a)
1208 :
1209 80449 : a => matrix_a%local_data
1210 80449 : size_a = SIZE(a, 1)*SIZE(a, 2)
1211 :
1212 80449 : CALL DSCAL(size_a, alpha, a, 1)
1213 :
1214 80449 : CALL timestop(handle)
1215 :
1216 80449 : END SUBROUTINE cp_fm_scale
1217 :
1218 : ! **************************************************************************************************
1219 : !> \brief transposes a matrix
1220 : !> matrixt = matrix ^ T
1221 : !> \param matrix ...
1222 : !> \param matrixt ...
1223 : !> \note
1224 : !> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1225 : ! **************************************************************************************************
1226 26482 : SUBROUTINE cp_fm_transpose(matrix, matrixt)
1227 : TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1228 :
1229 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
1230 :
1231 : INTEGER :: handle, ncol_global, &
1232 : nrow_global, ncol_globalt, nrow_globalt
1233 13241 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
1234 : #if defined(__parallel)
1235 : INTEGER, DIMENSION(9) :: desca, descc
1236 : #elif !defined(__MKL)
1237 : INTEGER :: i, j
1238 : #endif
1239 :
1240 13241 : nrow_global = matrix%matrix_struct%nrow_global
1241 13241 : ncol_global = matrix%matrix_struct%ncol_global
1242 13241 : nrow_globalt = matrixt%matrix_struct%nrow_global
1243 13241 : ncol_globalt = matrixt%matrix_struct%ncol_global
1244 0 : CPASSERT(nrow_global == ncol_globalt)
1245 13241 : CPASSERT(nrow_globalt == ncol_global)
1246 :
1247 13241 : CALL timeset(routineN, handle)
1248 :
1249 13241 : a => matrix%local_data
1250 13241 : c => matrixt%local_data
1251 :
1252 : #if defined(__parallel)
1253 132410 : desca(:) = matrix%matrix_struct%descriptor(:)
1254 132410 : descc(:) = matrixt%matrix_struct%descriptor(:)
1255 13241 : CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1256 : #elif defined(__MKL)
1257 : CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1258 : #else
1259 : DO j = 1, ncol_global
1260 : DO i = 1, nrow_global
1261 : c(j, i) = a(i, j)
1262 : END DO
1263 : END DO
1264 : #endif
1265 13241 : CALL timestop(handle)
1266 :
1267 13241 : END SUBROUTINE cp_fm_transpose
1268 :
1269 : ! **************************************************************************************************
1270 : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1271 : !> \param matrix the triangular matrix as input, the full matrix as output
1272 : !> \param work a matrix of the same size as matrix
1273 : !> \param uplo triangular format; defaults to 'U'
1274 : !> \author Matthias Krack
1275 : !> \note
1276 : !> the opposite triangular part is irrelevant
1277 : ! **************************************************************************************************
1278 340040 : SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1279 :
1280 : TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1281 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1282 :
1283 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
1284 :
1285 : CHARACTER :: myuplo
1286 : INTEGER :: handle, icol_global, irow_global, &
1287 : mypcol, myprow, ncol_global, &
1288 : npcol, nprow, nrow_global
1289 170020 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1290 170020 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1291 : TYPE(cp_blacs_env_type), POINTER :: context
1292 :
1293 : #if defined(__parallel)
1294 : INTEGER :: icol_local, irow_local, &
1295 : ncol_block, ncol_local, &
1296 : nrow_block, nrow_local
1297 : INTEGER, DIMENSION(9) :: desca, descc
1298 170020 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: c
1299 170020 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: c_sp
1300 : #endif
1301 :
1302 170020 : myuplo = 'U'
1303 0 : IF (PRESENT(uplo)) myuplo = uplo
1304 :
1305 170020 : nrow_global = matrix%matrix_struct%nrow_global
1306 170020 : ncol_global = matrix%matrix_struct%ncol_global
1307 170020 : CPASSERT(nrow_global == ncol_global)
1308 170020 : nrow_global = work%matrix_struct%nrow_global
1309 170020 : ncol_global = work%matrix_struct%ncol_global
1310 170020 : CPASSERT(nrow_global == ncol_global)
1311 170020 : CPASSERT(matrix%use_sp .EQV. work%use_sp)
1312 :
1313 170020 : CALL timeset(routineN, handle)
1314 :
1315 170020 : context => matrix%matrix_struct%context
1316 170020 : myprow = context%mepos(1)
1317 170020 : mypcol = context%mepos(2)
1318 170020 : nprow = context%num_pe(1)
1319 170020 : npcol = context%num_pe(2)
1320 :
1321 : #if defined(__parallel)
1322 :
1323 170020 : nrow_block = matrix%matrix_struct%nrow_block
1324 170020 : ncol_block = matrix%matrix_struct%ncol_block
1325 :
1326 170020 : nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1327 170020 : ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1328 :
1329 170020 : a => work%local_data
1330 170020 : a_sp => work%local_data_sp
1331 1700200 : desca(:) = work%matrix_struct%descriptor(:)
1332 170020 : c => matrix%local_data
1333 170020 : c_sp => matrix%local_data_sp
1334 1700200 : descc(:) = matrix%matrix_struct%descriptor(:)
1335 :
1336 5887815 : DO icol_local = 1, ncol_local
1337 5717795 : icol_global = matrix%matrix_struct%col_indices(icol_local)
1338 258985702 : DO irow_local = 1, nrow_local
1339 253097887 : irow_global = matrix%matrix_struct%row_indices(irow_local)
1340 258815682 : IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1341 124901514 : IF (matrix%use_sp) THEN
1342 0 : c_sp(irow_local, icol_local) = 0.0_sp
1343 : ELSE
1344 124901514 : c(irow_local, icol_local) = 0.0_dp
1345 : END IF
1346 128196373 : ELSE IF (irow_global == icol_global) THEN
1347 3294859 : IF (matrix%use_sp) THEN
1348 0 : c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1349 : ELSE
1350 3294859 : c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1351 : END IF
1352 : END IF
1353 : END DO
1354 : END DO
1355 :
1356 5887815 : DO icol_local = 1, ncol_local
1357 258985702 : DO irow_local = 1, nrow_local
1358 258815682 : IF (matrix%use_sp) THEN
1359 0 : a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1360 : ELSE
1361 253097887 : a(irow_local, icol_local) = c(irow_local, icol_local)
1362 : END IF
1363 : END DO
1364 : END DO
1365 :
1366 170020 : IF (matrix%use_sp) THEN
1367 0 : CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1368 : ELSE
1369 170020 : CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1370 : END IF
1371 :
1372 : #else
1373 :
1374 : a => matrix%local_data
1375 : a_sp => matrix%local_data_sp
1376 :
1377 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1378 : DO irow_global = 1, nrow_global
1379 : DO icol_global = irow_global + 1, ncol_global
1380 : IF (matrix%use_sp) THEN
1381 : a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1382 : ELSE
1383 : a(icol_global, irow_global) = a(irow_global, icol_global)
1384 : END IF
1385 : END DO
1386 : END DO
1387 : ELSE
1388 : DO icol_global = 1, ncol_global
1389 : DO irow_global = icol_global + 1, nrow_global
1390 : IF (matrix%use_sp) THEN
1391 : a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
1392 : ELSE
1393 : a(irow_global, icol_global) = a(icol_global, irow_global)
1394 : END IF
1395 : END DO
1396 : END DO
1397 : END IF
1398 :
1399 : #endif
1400 170020 : CALL timestop(handle)
1401 :
1402 170020 : END SUBROUTINE cp_fm_uplo_to_full
1403 :
1404 : ! **************************************************************************************************
1405 : !> \brief scales column i of matrix a with scaling(i)
1406 : !> \param matrixa ...
1407 : !> \param scaling : an array used for scaling the columns,
1408 : !> SIZE(scaling) determines the number of columns to be scaled
1409 : !> \author Joost VandeVondele
1410 : !> \note
1411 : !> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1412 : !> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1413 : !> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1414 : !> where every vector has a different prefactor
1415 : ! **************************************************************************************************
1416 125970 : SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1417 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1418 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1419 :
1420 : INTEGER :: k, mypcol, myprow, n, ncol_global, &
1421 : npcol, nprow
1422 125970 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1423 125970 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1424 : #if defined(__parallel)
1425 : INTEGER :: icol_global, icol_local, &
1426 : ipcol, iprow, irow_local
1427 : #else
1428 : INTEGER :: i
1429 : #endif
1430 :
1431 125970 : myprow = matrixa%matrix_struct%context%mepos(1)
1432 125970 : mypcol = matrixa%matrix_struct%context%mepos(2)
1433 125970 : nprow = matrixa%matrix_struct%context%num_pe(1)
1434 125970 : npcol = matrixa%matrix_struct%context%num_pe(2)
1435 :
1436 125970 : ncol_global = matrixa%matrix_struct%ncol_global
1437 :
1438 125970 : a => matrixa%local_data
1439 125970 : a_sp => matrixa%local_data_sp
1440 125970 : IF (matrixa%use_sp) THEN
1441 0 : n = SIZE(a_sp, 1)
1442 : ELSE
1443 125970 : n = SIZE(a, 1)
1444 : END IF
1445 125970 : k = MIN(SIZE(scaling), ncol_global)
1446 :
1447 : #if defined(__parallel)
1448 :
1449 2064025 : DO icol_global = 1, k
1450 : CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1451 : nprow, npcol, myprow, mypcol, &
1452 1938055 : irow_local, icol_local, iprow, ipcol)
1453 2064025 : IF ((ipcol == mypcol)) THEN
1454 1938055 : IF (matrixa%use_sp) THEN
1455 0 : CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1456 : ELSE
1457 1938055 : CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
1458 : END IF
1459 : END IF
1460 : END DO
1461 : #else
1462 : DO i = 1, k
1463 : IF (matrixa%use_sp) THEN
1464 : CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
1465 : ELSE
1466 : CALL DSCAL(n, scaling(i), a(:, i), 1)
1467 : END IF
1468 : END DO
1469 : #endif
1470 125970 : END SUBROUTINE cp_fm_column_scale
1471 :
1472 : ! **************************************************************************************************
1473 : !> \brief scales row i of matrix a with scaling(i)
1474 : !> \param matrixa ...
1475 : !> \param scaling : an array used for scaling the columns,
1476 : !> \author JGH
1477 : !> \note
1478 : ! **************************************************************************************************
1479 6276 : SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1480 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1481 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1482 :
1483 : INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1484 6276 : INTEGER, DIMENSION(:), POINTER :: row_indices
1485 6276 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1486 6276 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1487 : #if defined(__parallel)
1488 : INTEGER :: irow_global, icol, irow
1489 : #else
1490 : INTEGER :: j
1491 : #endif
1492 :
1493 : CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1494 6276 : nrow_local=nrow_local, ncol_local=ncol_local)
1495 6276 : CPASSERT(SIZE(scaling) == nrow_global)
1496 :
1497 6276 : a => matrixa%local_data
1498 6276 : a_sp => matrixa%local_data_sp
1499 6276 : IF (matrixa%use_sp) THEN
1500 6276 : n = SIZE(a_sp, 1)
1501 6276 : m = SIZE(a_sp, 2)
1502 : ELSE
1503 6276 : n = SIZE(a, 1)
1504 6276 : m = SIZE(a, 2)
1505 : END IF
1506 :
1507 : #if defined(__parallel)
1508 78924 : DO icol = 1, ncol_local
1509 78924 : IF (matrixa%use_sp) THEN
1510 0 : DO irow = 1, nrow_local
1511 0 : irow_global = row_indices(irow)
1512 0 : a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
1513 : END DO
1514 : ELSE
1515 6481782 : DO irow = 1, nrow_local
1516 6409134 : irow_global = row_indices(irow)
1517 6481782 : a(irow, icol) = scaling(irow_global)*a(irow, icol)
1518 : END DO
1519 : END IF
1520 : END DO
1521 : #else
1522 : IF (matrixa%use_sp) THEN
1523 : DO j = 1, m
1524 : a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
1525 : END DO
1526 : ELSE
1527 : DO j = 1, m
1528 : a(1:n, j) = scaling(1:n)*a(1:n, j)
1529 : END DO
1530 : END IF
1531 : #endif
1532 6276 : END SUBROUTINE cp_fm_row_scale
1533 :
1534 : ! **************************************************************************************************
1535 : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
1536 : !> \param matrix_a the matrix to invert
1537 : !> \param matrix_inverse the inverse of matrix_a
1538 : !> \param det_a the determinant of matrix_a
1539 : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
1540 : !> are screened
1541 : !> \param eigval optionally return matrix eigenvalues/singular values
1542 : !> \par History
1543 : !> note of Jan Wilhelm (12.2015)
1544 : !> - computation of determinant corrected
1545 : !> - determinant only computed if det_a is present
1546 : !> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
1547 : !> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
1548 : !> \author Florian Schiffmann(02.2007)
1549 : ! **************************************************************************************************
1550 1588 : SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
1551 :
1552 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
1553 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: det_a
1554 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_svd
1555 : REAL(KIND=dp), DIMENSION(:), POINTER, &
1556 : INTENT(INOUT), OPTIONAL :: eigval
1557 :
1558 : INTEGER :: n
1559 1588 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1560 : REAL(KIND=dp) :: determinant, my_eps_svd
1561 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1562 : TYPE(cp_fm_type) :: matrix_lu
1563 :
1564 : #if defined(__parallel)
1565 : TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
1566 : TYPE(mp_comm_type) :: group
1567 : INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
1568 : INTEGER, DIMENSION(9) :: desca
1569 : LOGICAL :: quenched
1570 : REAL(KIND=dp) :: alpha, beta
1571 1588 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
1572 1588 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
1573 : #else
1574 : LOGICAL :: sign
1575 : REAL(KIND=dp) :: eps1
1576 : #endif
1577 :
1578 1588 : my_eps_svd = 0.0_dp
1579 468 : IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
1580 :
1581 : CALL cp_fm_create(matrix=matrix_lu, &
1582 : matrix_struct=matrix_a%matrix_struct, &
1583 1588 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
1584 1588 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
1585 :
1586 1588 : a => matrix_lu%local_data
1587 1588 : n = matrix_lu%matrix_struct%nrow_global
1588 4764 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1589 19369 : ipivot(:) = 0
1590 : #if defined(__parallel)
1591 1588 : IF (my_eps_svd .EQ. 0.0_dp) THEN
1592 : ! Use LU decomposition
1593 1560 : lwork = 3*n
1594 1560 : liwork = 3*n
1595 15600 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1596 1560 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1597 :
1598 1560 : IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
1599 :
1600 1116 : ALLOCATE (diag(n))
1601 916 : diag(:) = 0.0_dp
1602 372 : CALL cp_fm_get_diag(matrix_lu, diag)
1603 :
1604 372 : exponent_of_minus_one = 0
1605 372 : determinant = 1.0_dp
1606 916 : DO i = 1, n
1607 544 : determinant = determinant*diag(i)
1608 916 : IF (ipivot(i) .NE. i) THEN
1609 224 : exponent_of_minus_one = exponent_of_minus_one + 1
1610 : END IF
1611 : END DO
1612 372 : IF (PRESENT(eigval)) THEN
1613 0 : CPASSERT(.NOT. ASSOCIATED(eigval))
1614 0 : ALLOCATE (eigval(n))
1615 0 : eigval(:) = diag
1616 : END IF
1617 372 : DEALLOCATE (diag)
1618 :
1619 372 : group = matrix_lu%matrix_struct%para_env
1620 372 : CALL group%sum(exponent_of_minus_one)
1621 :
1622 372 : determinant = determinant*(-1.0_dp)**exponent_of_minus_one
1623 :
1624 : END IF
1625 :
1626 1560 : alpha = 0.0_dp
1627 1560 : beta = 1.0_dp
1628 1560 : CALL cp_fm_set_all(matrix_inverse, alpha, beta)
1629 1560 : CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
1630 : ELSE
1631 : ! Use singular value decomposition
1632 : CALL cp_fm_create(matrix=u, &
1633 : matrix_struct=matrix_a%matrix_struct, &
1634 28 : name="LEFT_SINGULAR_MATRIX")
1635 28 : CALL cp_fm_set_all(u, alpha=0.0_dp)
1636 : CALL cp_fm_create(matrix=vt, &
1637 : matrix_struct=matrix_a%matrix_struct, &
1638 28 : name="RIGHT_SINGULAR_MATRIX")
1639 28 : CALL cp_fm_set_all(vt, alpha=0.0_dp)
1640 84 : ALLOCATE (diag(n))
1641 92 : diag(:) = 0.0_dp
1642 280 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1643 28 : ALLOCATE (work(1))
1644 : ! Workspace query
1645 28 : lwork = -1
1646 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1647 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1648 28 : lwork = INT(work(1))
1649 28 : DEALLOCATE (work)
1650 84 : ALLOCATE (work(lwork))
1651 : ! SVD
1652 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1653 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1654 : ! info == n+1 implies homogeneity error when the number of procs is large
1655 : ! this likely isnt a problem, but maybe we should handle it separately
1656 28 : IF (info /= 0 .AND. info /= n + 1) &
1657 0 : CPABORT("Singular value decomposition of matrix failed.")
1658 : ! (Pseudo)inverse and (pseudo)determinant
1659 : CALL cp_fm_create(matrix=sigma, &
1660 : matrix_struct=matrix_a%matrix_struct, &
1661 28 : name="SINGULAR_VALUE_MATRIX")
1662 28 : CALL cp_fm_set_all(sigma, alpha=0.0_dp)
1663 28 : determinant = 1.0_dp
1664 28 : quenched = .FALSE.
1665 28 : IF (PRESENT(eigval)) THEN
1666 28 : CPASSERT(.NOT. ASSOCIATED(eigval))
1667 84 : ALLOCATE (eigval(n))
1668 156 : eigval(:) = diag
1669 : END IF
1670 92 : DO i = 1, n
1671 64 : IF (diag(i) < my_eps_svd) THEN
1672 18 : diag(i) = 0.0_dp
1673 18 : quenched = .TRUE.
1674 : ELSE
1675 46 : determinant = determinant*diag(i)
1676 46 : diag(i) = 1.0_dp/diag(i)
1677 : END IF
1678 92 : CALL cp_fm_set_element(sigma, i, i, diag(i))
1679 : END DO
1680 28 : DEALLOCATE (diag)
1681 28 : IF (quenched) &
1682 : CALL cp_warn(__LOCATION__, &
1683 : "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
1684 12 : ". At least one singular value has been quenched.")
1685 : ! Sigma^-1 * U^T
1686 : CALL cp_fm_create(matrix=inv_sigma_ut, &
1687 : matrix_struct=matrix_a%matrix_struct, &
1688 28 : name="SINGULAR_VALUE_MATRIX")
1689 28 : CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
1690 : CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
1691 28 : u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
1692 : ! A^-1 = V * (Sigma^-1 * U^T)
1693 28 : CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
1694 : CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
1695 28 : inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
1696 : ! Clean up
1697 28 : DEALLOCATE (work)
1698 28 : CALL cp_fm_release(u)
1699 28 : CALL cp_fm_release(vt)
1700 28 : CALL cp_fm_release(sigma)
1701 140 : CALL cp_fm_release(inv_sigma_ut)
1702 : END IF
1703 : #else
1704 : IF (my_eps_svd .EQ. 0.0_dp) THEN
1705 : sign = .TRUE.
1706 : CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
1707 : eval_error=eps1)
1708 : CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
1709 : IF (PRESENT(eigval)) &
1710 : CALL cp_abort(__LOCATION__, &
1711 : "NYI. Eigenvalues not available for return without SCALAPACK.")
1712 : ELSE
1713 : CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
1714 : determinant, eigval)
1715 : END IF
1716 : #endif
1717 1588 : CALL cp_fm_release(matrix_lu)
1718 1588 : DEALLOCATE (ipivot)
1719 1588 : IF (PRESENT(det_a)) det_a = determinant
1720 1588 : END SUBROUTINE cp_fm_invert
1721 :
1722 : ! **************************************************************************************************
1723 : !> \brief inverts a triangular matrix
1724 : !> \param matrix_a ...
1725 : !> \param uplo_tr triangular format; defaults to 'U'
1726 : !> \author MI
1727 : ! **************************************************************************************************
1728 5060 : SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
1729 :
1730 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1731 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1732 :
1733 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
1734 :
1735 : CHARACTER :: unit_diag, uplo
1736 : INTEGER :: handle, info, ncol_global
1737 5060 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1738 : #if defined(__parallel)
1739 : INTEGER, DIMENSION(9) :: desca
1740 : #endif
1741 :
1742 5060 : CALL timeset(routineN, handle)
1743 :
1744 5060 : unit_diag = 'N'
1745 5060 : uplo = 'U'
1746 5060 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1747 :
1748 5060 : ncol_global = matrix_a%matrix_struct%ncol_global
1749 :
1750 5060 : a => matrix_a%local_data
1751 :
1752 : #if defined(__parallel)
1753 50600 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1754 :
1755 5060 : CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
1756 :
1757 : #else
1758 : CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
1759 : #endif
1760 :
1761 5060 : CALL timestop(handle)
1762 5060 : END SUBROUTINE cp_fm_triangular_invert
1763 :
1764 : ! **************************************************************************************************
1765 : !> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
1766 : !> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
1767 : !> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
1768 : !> \param matrix_a ...
1769 : !> \param matrix_r ...
1770 : !> \param nrow_fact ...
1771 : !> \param ncol_fact ...
1772 : !> \param first_row ...
1773 : !> \param first_col ...
1774 : !> \author MI
1775 : ! **************************************************************************************************
1776 19320 : SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
1777 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
1778 : INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
1779 : first_row, first_col
1780 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1781 :
1782 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
1783 :
1784 : CHARACTER :: myuplo
1785 : INTEGER :: handle, i, icol, info, irow, &
1786 : j, lda, lwork, ncol, &
1787 : ndim, nrow
1788 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
1789 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
1790 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1791 : #if defined(__parallel)
1792 : INTEGER, DIMENSION(9) :: desca
1793 : #endif
1794 :
1795 19320 : CALL timeset(routineN, handle)
1796 :
1797 19320 : myuplo = 'U'
1798 19320 : IF (PRESENT(uplo)) myuplo = uplo
1799 :
1800 19320 : ncol = matrix_a%matrix_struct%ncol_global
1801 19320 : nrow = matrix_a%matrix_struct%nrow_global
1802 19320 : lda = nrow
1803 :
1804 19320 : a => matrix_a%local_data
1805 :
1806 19320 : IF (PRESENT(nrow_fact)) nrow = nrow_fact
1807 19320 : IF (PRESENT(ncol_fact)) ncol = ncol_fact
1808 19320 : irow = 1
1809 19320 : IF (PRESENT(first_row)) irow = first_row
1810 19320 : icol = 1
1811 19320 : IF (PRESENT(first_col)) icol = first_col
1812 :
1813 19320 : CPASSERT(nrow >= ncol)
1814 19320 : ndim = SIZE(a, 2)
1815 57960 : ALLOCATE (tau(ndim))
1816 :
1817 : #if defined(__parallel)
1818 :
1819 193200 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1820 :
1821 19320 : lwork = -1
1822 57960 : ALLOCATE (work(2*ndim))
1823 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1824 19320 : lwork = INT(work(1))
1825 19320 : DEALLOCATE (work)
1826 57960 : ALLOCATE (work(lwork))
1827 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1828 :
1829 : #else
1830 : lwork = -1
1831 : ALLOCATE (work(2*ndim))
1832 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1833 : lwork = INT(work(1))
1834 : DEALLOCATE (work)
1835 : ALLOCATE (work(lwork))
1836 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1837 :
1838 : #endif
1839 :
1840 77280 : ALLOCATE (r_mat(ncol, ncol))
1841 19320 : CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
1842 19320 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1843 38640 : DO i = 1, ncol
1844 38640 : DO j = i + 1, ncol
1845 19320 : r_mat(j, i) = 0.0_dp
1846 : END DO
1847 : END DO
1848 : ELSE
1849 0 : DO j = 1, ncol
1850 0 : DO i = j + 1, ncol
1851 0 : r_mat(i, j) = 0.0_dp
1852 : END DO
1853 : END DO
1854 : END IF
1855 19320 : CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
1856 :
1857 19320 : DEALLOCATE (tau, work, r_mat)
1858 :
1859 19320 : CALL timestop(handle)
1860 :
1861 19320 : END SUBROUTINE cp_fm_qr_factorization
1862 :
1863 : ! **************************************************************************************************
1864 : !> \brief computes the the solution to A*b=A_general using lu decomposition
1865 : !> \param matrix_a input matrix; will be overwritten
1866 : !> \param general_a contains the result
1867 : !> \author Florian Schiffmann
1868 : ! **************************************************************************************************
1869 6822 : SUBROUTINE cp_fm_solve(matrix_a, general_a)
1870 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
1871 :
1872 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
1873 :
1874 : INTEGER :: handle, info, n, nrhs
1875 6822 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1876 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, a_general
1877 : #if defined(__parallel)
1878 : INTEGER, DIMENSION(9) :: desca, descb
1879 : #else
1880 : INTEGER :: lda, ldb
1881 : #endif
1882 :
1883 6822 : CALL timeset(routineN, handle)
1884 :
1885 6822 : a => matrix_a%local_data
1886 6822 : a_general => general_a%local_data
1887 6822 : n = matrix_a%matrix_struct%nrow_global
1888 6822 : nrhs = general_a%matrix_struct%ncol_global
1889 20466 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1890 :
1891 : #if defined(__parallel)
1892 68220 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1893 68220 : descb(:) = general_a%matrix_struct%descriptor(:)
1894 6822 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1895 : CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
1896 6822 : 1, 1, descb, info)
1897 :
1898 : #else
1899 : lda = SIZE(a, 1)
1900 : ldb = SIZE(a_general, 1)
1901 : CALL dgetrf(n, n, a, lda, ipivot, info)
1902 : CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
1903 :
1904 : #endif
1905 : ! info is allowed to be zero
1906 : ! this does just signal a zero diagonal element
1907 6822 : DEALLOCATE (ipivot)
1908 6822 : CALL timestop(handle)
1909 6822 : END SUBROUTINE
1910 :
1911 : ! **************************************************************************************************
1912 : !> \brief Convenience function. Computes the matrix multiplications needed
1913 : !> for the multiplication of complex matrices.
1914 : !> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
1915 : !> \param transa : 'N' -> normal 'T' -> transpose
1916 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
1917 : !> \param transb ...
1918 : !> \param m ...
1919 : !> \param n ...
1920 : !> \param k ...
1921 : !> \param alpha ...
1922 : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
1923 : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
1924 : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
1925 : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
1926 : !> \param beta ...
1927 : !> \param C_re m x n matrix, real part
1928 : !> \param C_im m x n matrix, imaginary part
1929 : !> \param a_first_col ...
1930 : !> \param a_first_row ...
1931 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
1932 : !> \param b_first_row ...
1933 : !> \param c_first_col ...
1934 : !> \param c_first_row ...
1935 : !> \author Samuel Andermatt
1936 : !> \note
1937 : !> C should have no overlap with A, B
1938 : ! **************************************************************************************************
1939 0 : SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
1940 : C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
1941 : c_first_row)
1942 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
1943 : INTEGER, INTENT(IN) :: m, n, k
1944 : REAL(KIND=dp), INTENT(IN) :: alpha
1945 : TYPE(cp_fm_type), INTENT(IN) :: A_re, A_im, B_re, B_im
1946 : REAL(KIND=dp), INTENT(IN) :: beta
1947 : TYPE(cp_fm_type), INTENT(IN) :: C_re, C_im
1948 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
1949 : b_first_row, c_first_col, c_first_row
1950 :
1951 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
1952 :
1953 : INTEGER :: handle
1954 :
1955 0 : CALL timeset(routineN, handle)
1956 :
1957 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
1958 : a_first_col=a_first_col, &
1959 : a_first_row=a_first_row, &
1960 : b_first_col=b_first_col, &
1961 : b_first_row=b_first_row, &
1962 : c_first_col=c_first_col, &
1963 0 : c_first_row=c_first_row)
1964 : CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
1965 : a_first_col=a_first_col, &
1966 : a_first_row=a_first_row, &
1967 : b_first_col=b_first_col, &
1968 : b_first_row=b_first_row, &
1969 : c_first_col=c_first_col, &
1970 0 : c_first_row=c_first_row)
1971 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
1972 : a_first_col=a_first_col, &
1973 : a_first_row=a_first_row, &
1974 : b_first_col=b_first_col, &
1975 : b_first_row=b_first_row, &
1976 : c_first_col=c_first_col, &
1977 0 : c_first_row=c_first_row)
1978 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
1979 : a_first_col=a_first_col, &
1980 : a_first_row=a_first_row, &
1981 : b_first_col=b_first_col, &
1982 : b_first_row=b_first_row, &
1983 : c_first_col=c_first_col, &
1984 0 : c_first_row=c_first_row)
1985 :
1986 0 : CALL timestop(handle)
1987 :
1988 0 : END SUBROUTINE cp_complex_fm_gemm
1989 :
1990 : ! **************************************************************************************************
1991 : !> \brief inverts a matrix using LU decomposition
1992 : !> the input matrix will be overwritten
1993 : !> \param matrix : input a general square non-singular matrix, outputs its inverse
1994 : !> \param info_out : optional, if present outputs the info from (p)zgetri
1995 : !> \author Lianheng Tong
1996 : ! **************************************************************************************************
1997 0 : SUBROUTINE cp_fm_lu_invert(matrix, info_out)
1998 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1999 : INTEGER, INTENT(OUT), OPTIONAL :: info_out
2000 :
2001 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
2002 :
2003 : INTEGER :: nrows_global, handle, info, lwork
2004 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2005 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat
2006 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: mat_sp
2007 0 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2008 0 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2009 : #if defined(__parallel)
2010 : INTEGER :: liwork
2011 : INTEGER, DIMENSION(9) :: desca
2012 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2013 : #else
2014 : INTEGER :: lda
2015 : #endif
2016 :
2017 0 : CALL timeset(routineN, handle)
2018 :
2019 0 : mat => matrix%local_data
2020 0 : mat_sp => matrix%local_data_sp
2021 0 : nrows_global = matrix%matrix_struct%nrow_global
2022 0 : CPASSERT(nrows_global .EQ. matrix%matrix_struct%ncol_global)
2023 0 : ALLOCATE (ipivot(nrows_global))
2024 : ! do LU decomposition
2025 : #if defined(__parallel)
2026 0 : desca = matrix%matrix_struct%descriptor
2027 0 : IF (matrix%use_sp) THEN
2028 : CALL psgetrf(nrows_global, nrows_global, &
2029 0 : mat_sp, 1, 1, desca, ipivot, info)
2030 : ELSE
2031 : CALL pdgetrf(nrows_global, nrows_global, &
2032 0 : mat, 1, 1, desca, ipivot, info)
2033 : END IF
2034 : #else
2035 : lda = SIZE(mat, 1)
2036 : IF (matrix%use_sp) THEN
2037 : CALL sgetrf(nrows_global, nrows_global, &
2038 : mat_sp, lda, ipivot, info)
2039 : ELSE
2040 : CALL dgetrf(nrows_global, nrows_global, &
2041 : mat, lda, ipivot, info)
2042 : END IF
2043 : #endif
2044 0 : IF (info /= 0) THEN
2045 0 : CALL cp_abort(__LOCATION__, "LU decomposition has failed")
2046 : END IF
2047 : ! do inversion
2048 0 : IF (matrix%use_sp) THEN
2049 0 : ALLOCATE (work(1))
2050 : ELSE
2051 0 : ALLOCATE (work_sp(1))
2052 : END IF
2053 : #if defined(__parallel)
2054 0 : ALLOCATE (iwork(1))
2055 0 : IF (matrix%use_sp) THEN
2056 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2057 0 : ipivot, work_sp, -1, iwork, -1, info)
2058 0 : lwork = INT(work_sp(1))
2059 0 : DEALLOCATE (work_sp)
2060 0 : ALLOCATE (work_sp(lwork))
2061 : ELSE
2062 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2063 0 : ipivot, work, -1, iwork, -1, info)
2064 0 : lwork = INT(work(1))
2065 0 : DEALLOCATE (work)
2066 0 : ALLOCATE (work(lwork))
2067 : END IF
2068 0 : liwork = INT(iwork(1))
2069 0 : DEALLOCATE (iwork)
2070 0 : ALLOCATE (iwork(liwork))
2071 0 : IF (matrix%use_sp) THEN
2072 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2073 0 : ipivot, work_sp, lwork, iwork, liwork, info)
2074 : ELSE
2075 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2076 0 : ipivot, work, lwork, iwork, liwork, info)
2077 : END IF
2078 0 : DEALLOCATE (iwork)
2079 : #else
2080 : IF (matrix%use_sp) THEN
2081 : CALL sgetri(nrows_global, mat_sp, lda, &
2082 : ipivot, work_sp, -1, info)
2083 : lwork = INT(work_sp(1))
2084 : DEALLOCATE (work_sp)
2085 : ALLOCATE (work_sp(lwork))
2086 : CALL sgetri(nrows_global, mat_sp, lda, &
2087 : ipivot, work_sp, lwork, info)
2088 : ELSE
2089 : CALL dgetri(nrows_global, mat, lda, &
2090 : ipivot, work, -1, info)
2091 : lwork = INT(work(1))
2092 : DEALLOCATE (work)
2093 : ALLOCATE (work(lwork))
2094 : CALL dgetri(nrows_global, mat, lda, &
2095 : ipivot, work, lwork, info)
2096 : END IF
2097 : #endif
2098 0 : IF (matrix%use_sp) THEN
2099 0 : DEALLOCATE (work_sp)
2100 : ELSE
2101 0 : DEALLOCATE (work)
2102 : END IF
2103 0 : DEALLOCATE (ipivot)
2104 :
2105 0 : IF (PRESENT(info_out)) THEN
2106 0 : info_out = info
2107 : ELSE
2108 0 : IF (info /= 0) &
2109 0 : CALL cp_abort(__LOCATION__, "LU inversion has failed")
2110 : END IF
2111 :
2112 0 : CALL timestop(handle)
2113 :
2114 0 : END SUBROUTINE cp_fm_lu_invert
2115 :
2116 : ! **************************************************************************************************
2117 : !> \brief norm of matrix using (p)dlange
2118 : !> \param matrix : input a general matrix
2119 : !> \param mode : 'M' max abs element value,
2120 : !> '1' or 'O' one norm, i.e. maximum column sum
2121 : !> 'I' infinity norm, i.e. maximum row sum
2122 : !> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2123 : !> \return : the norm according to mode
2124 : !> \author Lianheng Tong
2125 : ! **************************************************************************************************
2126 492 : FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2127 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2128 : CHARACTER, INTENT(IN) :: mode
2129 : REAL(KIND=dp) :: res
2130 :
2131 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
2132 :
2133 : INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2134 : REAL(KIND=sp) :: res_sp
2135 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2136 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2137 492 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2138 492 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2139 : #if defined(__parallel)
2140 : INTEGER, DIMENSION(9) :: desca
2141 : #else
2142 : INTEGER :: lda
2143 : #endif
2144 :
2145 492 : CALL timeset(routineN, handle)
2146 :
2147 : CALL cp_fm_get_info(matrix=matrix, &
2148 : nrow_global=nrows, &
2149 : ncol_global=ncols, &
2150 : nrow_local=nrows_local, &
2151 492 : ncol_local=ncols_local)
2152 492 : aa => matrix%local_data
2153 492 : aa_sp => matrix%local_data_sp
2154 :
2155 : #if defined(__parallel)
2156 4920 : desca = matrix%matrix_struct%descriptor
2157 : SELECT CASE (mode)
2158 : CASE ('M', 'm')
2159 492 : lwork = 1
2160 : CASE ('1', 'O', 'o')
2161 492 : lwork = ncols_local
2162 : CASE ('I', 'i')
2163 0 : lwork = nrows_local
2164 : CASE ('F', 'f', 'E', 'e')
2165 0 : lwork = 1
2166 : CASE DEFAULT
2167 492 : CPABORT("mode input is not valid")
2168 : END SELECT
2169 492 : IF (matrix%use_sp) THEN
2170 0 : ALLOCATE (work_sp(lwork))
2171 0 : res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2172 0 : DEALLOCATE (work_sp)
2173 0 : res = REAL(res_sp, KIND=dp)
2174 : ELSE
2175 1476 : ALLOCATE (work(lwork))
2176 492 : res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2177 492 : DEALLOCATE (work)
2178 : END IF
2179 : #else
2180 : SELECT CASE (mode)
2181 : CASE ('M', 'm')
2182 : lwork = 1
2183 : CASE ('1', 'O', 'o')
2184 : lwork = 1
2185 : CASE ('I', 'i')
2186 : lwork = nrows
2187 : CASE ('F', 'f', 'E', 'e')
2188 : lwork = 1
2189 : CASE DEFAULT
2190 : CPABORT("mode input is not valid")
2191 : END SELECT
2192 : IF (matrix%use_sp) THEN
2193 : ALLOCATE (work_sp(lwork))
2194 : lda = SIZE(aa_sp, 1)
2195 : res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2196 : DEALLOCATE (work_sp)
2197 : res = REAL(res_sp, KIND=dp)
2198 : ELSE
2199 : ALLOCATE (work(lwork))
2200 : lda = SIZE(aa, 1)
2201 : res = dlange(mode, nrows, ncols, aa, lda, work)
2202 : DEALLOCATE (work)
2203 : END IF
2204 : #endif
2205 :
2206 492 : CALL timestop(handle)
2207 :
2208 492 : END FUNCTION cp_fm_norm
2209 :
2210 : ! **************************************************************************************************
2211 : !> \brief trace of a matrix using pdlatra
2212 : !> \param matrix : input a square matrix
2213 : !> \return : the trace
2214 : !> \author Lianheng Tong
2215 : ! **************************************************************************************************
2216 0 : FUNCTION cp_fm_latra(matrix) RESULT(res)
2217 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2218 : REAL(KIND=dp) :: res
2219 :
2220 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
2221 :
2222 : INTEGER :: nrows, ncols, handle
2223 : REAL(KIND=sp) :: res_sp
2224 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2225 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2226 : #if defined(__parallel)
2227 : INTEGER, DIMENSION(9) :: desca
2228 : #else
2229 : INTEGER :: ii
2230 : #endif
2231 :
2232 0 : CALL timeset(routineN, handle)
2233 :
2234 0 : nrows = matrix%matrix_struct%nrow_global
2235 0 : ncols = matrix%matrix_struct%ncol_global
2236 0 : CPASSERT(nrows .EQ. ncols)
2237 0 : aa => matrix%local_data
2238 0 : aa_sp => matrix%local_data_sp
2239 :
2240 : #if defined(__parallel)
2241 0 : desca = matrix%matrix_struct%descriptor
2242 0 : IF (matrix%use_sp) THEN
2243 0 : res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2244 0 : res = REAL(res_sp, KIND=dp)
2245 : ELSE
2246 0 : res = pdlatra(nrows, aa, 1, 1, desca)
2247 : END IF
2248 : #else
2249 : IF (matrix%use_sp) THEN
2250 : res_sp = 0.0_sp
2251 : DO ii = 1, nrows
2252 : res_sp = res_sp + aa_sp(ii, ii)
2253 : END DO
2254 : res = REAL(res_sp, KIND=dp)
2255 : ELSE
2256 : res = 0.0_dp
2257 : DO ii = 1, nrows
2258 : res = res + aa(ii, ii)
2259 : END DO
2260 : END IF
2261 : #endif
2262 :
2263 0 : CALL timestop(handle)
2264 :
2265 0 : END FUNCTION cp_fm_latra
2266 :
2267 : ! **************************************************************************************************
2268 : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2269 : !> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2270 : !> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2271 : !> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2272 : !> \param nrow ...
2273 : !> \param ncol ...
2274 : !> \param first_row ...
2275 : !> \param first_col ...
2276 : !> \author MI
2277 : ! **************************************************************************************************
2278 36 : SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2279 :
2280 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2281 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2282 : INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2283 :
2284 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
2285 :
2286 : INTEGER :: handle
2287 : INTEGER :: info, lwork
2288 36 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2289 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2290 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2291 : #if defined(__parallel)
2292 : INTEGER, DIMENSION(9) :: descc
2293 : #else
2294 : INTEGER :: lda
2295 : #endif
2296 :
2297 36 : CALL timeset(routineN, handle)
2298 :
2299 36 : a => matrix%local_data
2300 36 : lwork = -1
2301 108 : ALLOCATE (work(2*nrow))
2302 108 : ALLOCATE (ipiv(ncol))
2303 36 : info = 0
2304 :
2305 : #if defined(__parallel)
2306 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2307 : ! Call SCALAPACK routine to get optimal work dimension
2308 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2309 36 : lwork = INT(work(1))
2310 36 : DEALLOCATE (work)
2311 108 : ALLOCATE (work(lwork))
2312 244 : tau = 0.0_dp
2313 354 : ipiv = 0
2314 :
2315 : ! Call SCALAPACK routine to get QR decomposition of CTs
2316 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2317 : #else
2318 : CPASSERT(first_row == 1 .AND. first_col == 1)
2319 : lda = SIZE(a, 1)
2320 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2321 : lwork = INT(work(1))
2322 : DEALLOCATE (work)
2323 : ALLOCATE (work(lwork))
2324 : tau = 0.0_dp
2325 : ipiv = 0
2326 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2327 : #endif
2328 36 : CPASSERT(info == 0)
2329 :
2330 36 : DEALLOCATE (work)
2331 36 : DEALLOCATE (ipiv)
2332 :
2333 36 : CALL timestop(handle)
2334 :
2335 36 : END SUBROUTINE cp_fm_pdgeqpf
2336 :
2337 : ! **************************************************************************************************
2338 : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2339 : !> with orthonormal columns, which is defined as the first N columns of a product of K
2340 : !> elementary reflectors of order M
2341 : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2342 : !> as returned from PDGEQRF
2343 : !> On exit it contains the M-by-N distributed matrix Q
2344 : !> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2345 : !> \param nrow ...
2346 : !> \param first_row ...
2347 : !> \param first_col ...
2348 : !> \author MI
2349 : ! **************************************************************************************************
2350 36 : SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2351 :
2352 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2353 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2354 : INTEGER, INTENT(IN) :: nrow, first_row, first_col
2355 :
2356 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
2357 :
2358 : INTEGER :: handle
2359 : INTEGER :: info, lwork
2360 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2361 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2362 : #if defined(__parallel)
2363 : INTEGER, DIMENSION(9) :: descc
2364 : #else
2365 : INTEGER :: lda
2366 : #endif
2367 :
2368 36 : CALL timeset(routineN, handle)
2369 :
2370 36 : a => matrix%local_data
2371 36 : lwork = -1
2372 108 : ALLOCATE (work(2*nrow))
2373 36 : info = 0
2374 :
2375 : #if defined(__parallel)
2376 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2377 :
2378 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2379 36 : CPASSERT(info == 0)
2380 36 : lwork = INT(work(1))
2381 36 : DEALLOCATE (work)
2382 108 : ALLOCATE (work(lwork))
2383 :
2384 : ! Call SCALAPACK routine to get Q
2385 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2386 : #else
2387 : CPASSERT(first_row == 1 .AND. first_col == 1)
2388 : lda = SIZE(a, 1)
2389 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2390 : lwork = INT(work(1))
2391 : DEALLOCATE (work)
2392 : ALLOCATE (work(lwork))
2393 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2394 : #endif
2395 36 : CPASSERT(INFO == 0)
2396 :
2397 36 : DEALLOCATE (work)
2398 36 : CALL timestop(handle)
2399 :
2400 36 : END SUBROUTINE cp_fm_pdorgqr
2401 :
2402 : ! **************************************************************************************************
2403 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2404 : !> \param cs cosine of the rotation angle
2405 : !> \param sn sinus of the rotation angle
2406 : !> \param irow ...
2407 : !> \param jrow ...
2408 : !> \author Ole Schuett
2409 : ! **************************************************************************************************
2410 543328 : SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2411 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2412 : INTEGER, INTENT(IN) :: irow, jrow
2413 : REAL(dp), INTENT(IN) :: cs, sn
2414 :
2415 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
2416 : INTEGER :: handle, nrow, ncol
2417 :
2418 : #if defined(__parallel)
2419 : INTEGER :: info, lwork
2420 : INTEGER, DIMENSION(9) :: desc
2421 543328 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2422 : #endif
2423 :
2424 543328 : CALL timeset(routineN, handle)
2425 543328 : CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2426 :
2427 : #if defined(__parallel)
2428 543328 : lwork = 2*ncol + 1
2429 1629984 : ALLOCATE (work(lwork))
2430 5433280 : desc(:) = matrix%matrix_struct%descriptor(:)
2431 : CALL pdrot(ncol, &
2432 : matrix%local_data(1, 1), irow, 1, desc, ncol, &
2433 : matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2434 543328 : cs, sn, work, lwork, info)
2435 543328 : CPASSERT(info == 0)
2436 543328 : DEALLOCATE (work)
2437 : #else
2438 : CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2439 : #endif
2440 :
2441 543328 : CALL timestop(handle)
2442 543328 : END SUBROUTINE cp_fm_rot_rows
2443 :
2444 : ! **************************************************************************************************
2445 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2446 : !> \param cs cosine of the rotation angle
2447 : !> \param sn sinus of the rotation angle
2448 : !> \param icol ...
2449 : !> \param jcol ...
2450 : !> \author Ole Schuett
2451 : ! **************************************************************************************************
2452 612158 : SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2453 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2454 : INTEGER, INTENT(IN) :: icol, jcol
2455 : REAL(dp), INTENT(IN) :: cs, sn
2456 :
2457 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
2458 : INTEGER :: handle, nrow, ncol
2459 :
2460 : #if defined(__parallel)
2461 : INTEGER :: info, lwork
2462 : INTEGER, DIMENSION(9) :: desc
2463 612158 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2464 : #endif
2465 :
2466 612158 : CALL timeset(routineN, handle)
2467 612158 : CALL cp_fm_get_info(matrix, nrow_global=nrow, ncol_global=ncol)
2468 :
2469 : #if defined(__parallel)
2470 612158 : lwork = 2*nrow + 1
2471 1836474 : ALLOCATE (work(lwork))
2472 6121580 : desc(:) = matrix%matrix_struct%descriptor(:)
2473 : CALL pdrot(nrow, &
2474 : matrix%local_data(1, 1), 1, icol, desc, 1, &
2475 : matrix%local_data(1, 1), 1, jcol, desc, 1, &
2476 612158 : cs, sn, work, lwork, info)
2477 612158 : CPASSERT(info == 0)
2478 612158 : DEALLOCATE (work)
2479 : #else
2480 : CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2481 : #endif
2482 :
2483 612158 : CALL timestop(handle)
2484 612158 : END SUBROUTINE cp_fm_rot_cols
2485 :
2486 : ! **************************************************************************************************
2487 : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2488 : !> \param matrix_a ...
2489 : !> \param B ...
2490 : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2491 : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2492 : !> \param start_row starting index of rows, optional, defaults to 1
2493 : !> \param start_col starting index of columns, optional, defaults to 1
2494 : !> \param do_norm ...
2495 : !> \param do_print ...
2496 : ! **************************************************************************************************
2497 0 : SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2498 : do_norm, do_print)
2499 :
2500 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2501 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: B
2502 : INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2503 : LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2504 :
2505 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
2506 :
2507 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2508 : j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2509 : start_col_local, start_row_global, start_row_local, this_col, unit_nr
2510 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2511 : LOGICAL :: my_do_norm, my_do_print
2512 : REAL(KIND=dp) :: norm
2513 0 : REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2514 :
2515 0 : CALL timeset(routineN, handle)
2516 :
2517 0 : my_do_norm = .TRUE.
2518 0 : IF (PRESENT(do_norm)) my_do_norm = do_norm
2519 :
2520 0 : my_do_print = .FALSE.
2521 0 : IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2522 :
2523 0 : unit_nr = -1
2524 0 : IF (my_do_print) THEN
2525 0 : unit_nr = cp_logger_get_default_unit_nr()
2526 0 : IF (unit_nr < 1) my_do_print = .FALSE.
2527 : END IF
2528 :
2529 0 : IF (SIZE(B) /= 0) THEN
2530 0 : IF (PRESENT(nrows)) THEN
2531 0 : nrow_global = nrows
2532 : ELSE
2533 0 : nrow_global = SIZE(B, 1)
2534 : END IF
2535 :
2536 0 : IF (PRESENT(ncols)) THEN
2537 0 : ncol_global = ncols
2538 : ELSE
2539 0 : ncol_global = SIZE(B, 2)
2540 : END IF
2541 :
2542 0 : IF (PRESENT(start_row)) THEN
2543 0 : start_row_global = start_row
2544 : ELSE
2545 : start_row_global = 1
2546 : END IF
2547 :
2548 0 : IF (PRESENT(start_col)) THEN
2549 0 : start_col_global = start_col
2550 : ELSE
2551 : start_col_global = 1
2552 : END IF
2553 :
2554 0 : end_row_global = start_row_global + nrow_global - 1
2555 0 : end_col_global = start_col_global + ncol_global - 1
2556 :
2557 : CALL cp_fm_get_info(matrix=matrix_a, &
2558 : nrow_global=nrow_global, ncol_global=ncol_global, &
2559 : nrow_local=nrow_local, ncol_local=ncol_local, &
2560 0 : row_indices=row_indices, col_indices=col_indices)
2561 0 : IF (end_row_global > nrow_global) THEN
2562 : end_row_global = nrow_global
2563 : END IF
2564 0 : IF (end_col_global > ncol_global) THEN
2565 : end_col_global = ncol_global
2566 : END IF
2567 :
2568 : ! find out row/column indices of locally stored matrix elements that
2569 : ! needs to be copied.
2570 : ! Arrays row_indices and col_indices are assumed to be sorted in
2571 : ! ascending order
2572 0 : DO start_row_local = 1, nrow_local
2573 0 : IF (row_indices(start_row_local) >= start_row_global) EXIT
2574 : END DO
2575 :
2576 0 : DO end_row_local = start_row_local, nrow_local
2577 0 : IF (row_indices(end_row_local) > end_row_global) EXIT
2578 : END DO
2579 0 : end_row_local = end_row_local - 1
2580 :
2581 0 : DO start_col_local = 1, ncol_local
2582 0 : IF (col_indices(start_col_local) >= start_col_global) EXIT
2583 : END DO
2584 :
2585 0 : DO end_col_local = start_col_local, ncol_local
2586 0 : IF (col_indices(end_col_local) > end_col_global) EXIT
2587 : END DO
2588 0 : end_col_local = end_col_local - 1
2589 :
2590 0 : a => matrix_a%local_data
2591 :
2592 0 : this_col = col_indices(start_col_local) - start_col_global + 1
2593 :
2594 0 : B(:, this_col) = a(:, start_col_local)
2595 :
2596 0 : IF (my_do_norm) THEN
2597 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2598 0 : B(:, this_col) = B(:, this_col)/norm
2599 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2600 : END IF
2601 :
2602 0 : DO i = start_col_local + 1, end_col_local
2603 0 : this_col = col_indices(i) - start_col_global + 1
2604 0 : B(:, this_col) = a(:, i)
2605 0 : DO j = start_col_local, i - 1
2606 0 : j_col = col_indices(j) - start_col_global + 1
2607 : B(:, this_col) = B(:, this_col) - &
2608 : accurate_dot_product(B(:, j_col), B(:, this_col))* &
2609 0 : B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
2610 : END DO
2611 :
2612 0 : IF (my_do_norm) THEN
2613 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2614 0 : B(:, this_col) = B(:, this_col)/norm
2615 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2616 : END IF
2617 :
2618 : END DO
2619 0 : CALL matrix_a%matrix_struct%para_env%sum(B)
2620 : END IF
2621 :
2622 0 : CALL timestop(handle)
2623 :
2624 0 : END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
2625 :
2626 : ! **************************************************************************************************
2627 : !> \brief Cholesky decomposition
2628 : !> \param fm_matrix ...
2629 : !> \param n ...
2630 : !> \param uplo triangular format; defaults to 'U'
2631 : ! **************************************************************************************************
2632 10015 : SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
2633 : TYPE(cp_fm_type) :: fm_matrix
2634 : INTEGER, INTENT(IN) :: n
2635 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2636 :
2637 : CHARACTER :: myuplo
2638 : INTEGER :: info
2639 10015 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2640 10015 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2641 : #if defined(__parallel)
2642 : INTEGER, DIMENSION(9) :: desca
2643 : #endif
2644 :
2645 10015 : myuplo = 'U'
2646 0 : IF (PRESENT(uplo)) myuplo = uplo
2647 :
2648 10015 : a => fm_matrix%local_data
2649 10015 : a_sp => fm_matrix%local_data_sp
2650 : #if defined(__parallel)
2651 100150 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2652 10015 : IF (fm_matrix%use_sp) THEN
2653 0 : CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
2654 : ELSE
2655 10015 : CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
2656 : END IF
2657 : #else
2658 : IF (fm_matrix%use_sp) THEN
2659 : CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
2660 : ELSE
2661 : CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
2662 : END IF
2663 : #endif
2664 10015 : IF (info /= 0) &
2665 0 : CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
2666 :
2667 10015 : END SUBROUTINE cp_fm_potrf
2668 :
2669 : ! **************************************************************************************************
2670 : !> \brief Invert trianguar matrix
2671 : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
2672 : !> \param n size of the matrix to invert
2673 : !> \param uplo triangular format; defaults to 'U'
2674 : ! **************************************************************************************************
2675 9239 : SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
2676 : TYPE(cp_fm_type) :: fm_matrix
2677 : INTEGER, INTENT(IN) :: n
2678 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2679 :
2680 : CHARACTER :: myuplo
2681 9239 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2682 9239 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2683 : INTEGER :: info
2684 : #if defined(__parallel)
2685 : INTEGER, DIMENSION(9) :: desca
2686 : #endif
2687 :
2688 9239 : myuplo = 'U'
2689 0 : IF (PRESENT(uplo)) myuplo = uplo
2690 :
2691 9239 : a => fm_matrix%local_data
2692 9239 : a_sp => fm_matrix%local_data_sp
2693 : #if defined(__parallel)
2694 92390 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2695 9239 : IF (fm_matrix%use_sp) THEN
2696 0 : CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
2697 : ELSE
2698 9239 : CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
2699 : END IF
2700 : #else
2701 : IF (fm_matrix%use_sp) THEN
2702 : CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
2703 : ELSE
2704 : CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
2705 : END IF
2706 : #endif
2707 9239 : CPASSERT(info == 0)
2708 9239 : END SUBROUTINE cp_fm_potri
2709 :
2710 : ! **************************************************************************************************
2711 : !> \brief ...
2712 : !> \param fm_matrix ...
2713 : !> \param neig ...
2714 : !> \param fm_matrixb ...
2715 : !> \param fm_matrixout ...
2716 : !> \param op ...
2717 : !> \param pos ...
2718 : !> \param transa ...
2719 : ! **************************************************************************************************
2720 1184 : SUBROUTINE cp_fm_cholesky_restore(fm_matrix, neig, fm_matrixb, fm_matrixout, op, pos, transa)
2721 : TYPE(cp_fm_type) :: fm_matrix
2722 : TYPE(cp_fm_type) :: fm_matrixb
2723 : TYPE(cp_fm_type) :: fm_matrixout
2724 : INTEGER, INTENT(IN) :: neig
2725 : CHARACTER(LEN=*), INTENT(IN) :: op
2726 : CHARACTER(LEN=*), INTENT(IN) :: pos
2727 : CHARACTER(LEN=*), INTENT(IN) :: transa
2728 :
2729 1184 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, outm
2730 1184 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, outm_sp
2731 : INTEGER :: n, itype
2732 : REAL(KIND=dp) :: alpha
2733 : #if defined(__parallel)
2734 : INTEGER :: i
2735 : INTEGER, DIMENSION(9) :: desca, descb, descout
2736 : #endif
2737 :
2738 : ! notice b is the cholesky guy
2739 1184 : a => fm_matrix%local_data
2740 1184 : b => fm_matrixb%local_data
2741 1184 : outm => fm_matrixout%local_data
2742 1184 : a_sp => fm_matrix%local_data_sp
2743 1184 : b_sp => fm_matrixb%local_data_sp
2744 1184 : outm_sp => fm_matrixout%local_data_sp
2745 :
2746 1184 : n = fm_matrix%matrix_struct%nrow_global
2747 1184 : itype = 1
2748 :
2749 : #if defined(__parallel)
2750 11840 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2751 11840 : descb(:) = fm_matrixb%matrix_struct%descriptor(:)
2752 11840 : descout(:) = fm_matrixout%matrix_struct%descriptor(:)
2753 1184 : alpha = 1.0_dp
2754 5316 : DO i = 1, neig
2755 5316 : IF (fm_matrix%use_sp) THEN
2756 0 : CALL pscopy(n, a_sp(1, 1), 1, i, desca, 1, outm_sp(1, 1), 1, i, descout, 1)
2757 : ELSE
2758 4132 : CALL pdcopy(n, a(1, 1), 1, i, desca, 1, outm(1, 1), 1, i, descout, 1)
2759 : END IF
2760 : END DO
2761 1184 : IF (op .EQ. "SOLVE") THEN
2762 1184 : IF (fm_matrix%use_sp) THEN
2763 : CALL pstrsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
2764 0 : outm_sp(1, 1), 1, 1, descout)
2765 : ELSE
2766 1184 : CALL pdtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
2767 : END IF
2768 : ELSE
2769 0 : IF (fm_matrix%use_sp) THEN
2770 : CALL pstrmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), 1, 1, descb, &
2771 0 : outm_sp(1, 1), 1, 1, descout)
2772 : ELSE
2773 0 : CALL pdtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), 1, 1, descb, outm(1, 1), 1, 1, descout)
2774 : END IF
2775 : END IF
2776 : #else
2777 : alpha = 1.0_dp
2778 : IF (fm_matrix%use_sp) THEN
2779 : CALL scopy(neig*n, a_sp(1, 1), 1, outm_sp(1, 1), 1)
2780 : ELSE
2781 : CALL dcopy(neig*n, a(1, 1), 1, outm(1, 1), 1)
2782 : END IF
2783 : IF (op .EQ. "SOLVE") THEN
2784 : IF (fm_matrix%use_sp) THEN
2785 : CALL strsm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), SIZE(b_sp, 1), outm_sp(1, 1), n)
2786 : ELSE
2787 : CALL dtrsm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), SIZE(b, 1), outm(1, 1), n)
2788 : END IF
2789 : ELSE
2790 : IF (fm_matrix%use_sp) THEN
2791 : CALL strmm(pos, 'U', transa, 'N', n, neig, REAL(alpha, sp), b_sp(1, 1), n, outm_sp(1, 1), n)
2792 : ELSE
2793 : CALL dtrmm(pos, 'U', transa, 'N', n, neig, alpha, b(1, 1), n, outm(1, 1), n)
2794 : END IF
2795 : END IF
2796 : #endif
2797 :
2798 1184 : END SUBROUTINE cp_fm_cholesky_restore
2799 :
2800 : ! **************************************************************************************************
2801 : !> \brief Calculates
2802 : !> yv = alpha*amat*xv + beta*yv
2803 : !> where amat: fm matrix
2804 : !> xv : vector replicated
2805 : !> yv : vector replicated
2806 : !> Defaults: alpha = 1, beta = 0
2807 : ! **************************************************************************************************
2808 16880 : SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
2809 : TYPE(cp_fm_type), INTENT(IN) :: amat
2810 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: xv
2811 : REAL(KIND=dp), DIMENSION(:), INTENT(INOUT) :: yv
2812 : REAL(KIND=dp), OPTIONAL, INTENT(IN) :: alpha, beta
2813 :
2814 : INTEGER :: na, nc, nx, ny
2815 : REAL(KIND=dp) :: aval, bval
2816 : #if defined(__parallel)
2817 : INTEGER :: nrl, ncl, ic, ir
2818 16880 : INTEGER, DIMENSION(:), POINTER :: rind, cind
2819 16880 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
2820 : #endif
2821 :
2822 16880 : IF (amat%use_sp) THEN
2823 0 : CPABORT("cp_fm_matvec: SP option not available")
2824 : END IF
2825 16880 : aval = 1.0_dp
2826 16880 : IF (PRESENT(alpha)) aval = alpha
2827 16880 : bval = 0.0_dp
2828 16880 : IF (PRESENT(beta)) bval = beta
2829 :
2830 16880 : CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
2831 16880 : nx = SIZE(xv)
2832 16880 : ny = SIZE(yv)
2833 16880 : IF ((nx /= ny) .OR. (nc /= nx)) THEN
2834 0 : CPABORT("cp_fm_matvec: incompatible dimensions")
2835 : END IF
2836 : #if defined(__parallel)
2837 : CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
2838 16880 : row_indices=rind, col_indices=cind)
2839 118160 : ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
2840 147680 : DO ic = 1, ncl
2841 147680 : xvl(ic) = xv(cind(ic))
2842 : END DO
2843 1665560 : yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
2844 147680 : yvm = 0.0_dp
2845 82280 : DO ir = 1, nrl
2846 82280 : yvm(rind(ir)) = yvl(ir)
2847 : END DO
2848 16880 : CALL amat%matrix_struct%para_env%sum(yvm)
2849 16880 : IF (bval == 0.0_dp) THEN
2850 73840 : yv = aval*yvm
2851 : ELSE
2852 73840 : yv = bval*yv + aval*yvm
2853 : END IF
2854 : #else
2855 : IF (bval == 0.0_dp) THEN
2856 : yv = aval*MATMUL(amat%local_data, xv)
2857 : ELSE
2858 : yv = bval*yv + aval*MATMUL(amat%local_data, xv)
2859 : END IF
2860 : #endif
2861 :
2862 67520 : END SUBROUTINE cp_fm_matvec
2863 :
2864 : END MODULE cp_fm_basic_linalg
|