Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief methods for arnoldi iteration
10 : !> \par History
11 : !> 2014.09 created [Florian Schiffmann]
12 : !> \author Florian Schiffmann
13 : ! **************************************************************************************************
14 :
15 : #:include 'arnoldi.fypp'
16 : MODULE arnoldi_methods
17 : USE arnoldi_geev, ONLY: arnoldi_general_local_diag, &
18 : arnoldi_symm_local_diag, &
19 : arnoldi_tridiag_local_diag
20 : USE arnoldi_types, ONLY: &
21 : arnoldi_control_type, arnoldi_data_c_type, arnoldi_data_d_type, arnoldi_data_s_type, &
22 : arnoldi_data_type, arnoldi_data_z_type, get_control, get_data_c, get_data_d, get_data_s, &
23 : get_data_z, has_d_cmplx, has_d_real, m_x_v_vectors_type
24 : USE cp_dbcsr_api, ONLY: &
25 : dbcsr_add, dbcsr_copy, dbcsr_get_data_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &
26 : dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
27 : dbcsr_p_type, dbcsr_scale, dbcsr_type
28 : USE dbcsr_vector, ONLY: dbcsr_matrix_colvec_multiply
29 : USE kinds, ONLY: real_8
30 : USE message_passing, ONLY: mp_comm_type
31 : #include "../base/base_uses.f90"
32 :
33 : IMPLICIT NONE
34 :
35 : PRIVATE
36 :
37 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_methods'
38 :
39 : PUBLIC :: arnoldi_init, build_subspace, compute_evals, arnoldi_iram, &
40 : gev_arnoldi_init, gev_build_subspace, gev_update_data
41 :
42 : INTERFACE convert_matrix
43 : MODULE PROCEDURE convert_matrix_z_to_d
44 : MODULE PROCEDURE convert_matrix_d_to_z
45 : MODULE PROCEDURE convert_matrix_z_to_z
46 : END INTERFACE
47 :
48 : CONTAINS
49 :
50 : ! **************************************************************************************************
51 : !> \brief Interface for the routine calcualting the implicit restarts
52 : !> Currently all based on lapack
53 : !> \param arnoldi_data ...
54 : ! **************************************************************************************************
55 250 : SUBROUTINE arnoldi_iram(arnoldi_data)
56 : TYPE(arnoldi_data_type) :: arnoldi_data
57 :
58 : CHARACTER(LEN=*), PARAMETER :: routineN = 'arnoldi_iram'
59 :
60 : INTEGER :: handle
61 :
62 250 : CALL timeset(routineN, handle)
63 :
64 250 : IF (has_d_real(arnoldi_data)) CALL arnoldi_iram_d(arnoldi_data)
65 250 : IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_iram_z(arnoldi_data)
66 :
67 250 : CALL timestop(handle)
68 :
69 250 : END SUBROUTINE arnoldi_iram
70 :
71 : ! **************************************************************************************************
72 : !> \brief Interface to compute the eigenvalues of a nonsymmetric matrix
73 : !> This is only the serial version
74 : !> \param arnoldi_data ...
75 : ! **************************************************************************************************
76 129914 : SUBROUTINE compute_evals(arnoldi_data)
77 : TYPE(arnoldi_data_type) :: arnoldi_data
78 :
79 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_evals'
80 :
81 : INTEGER :: handle
82 :
83 129914 : CALL timeset(routineN, handle)
84 :
85 129914 : IF (has_d_real(arnoldi_data)) CALL compute_evals_d(arnoldi_data)
86 129914 : IF (has_d_cmplx(arnoldi_data)) CALL compute_evals_z(arnoldi_data)
87 :
88 129914 : CALL timestop(handle)
89 :
90 129914 : END SUBROUTINE compute_evals
91 :
92 : ! **************************************************************************************************
93 : !> \brief Interface for the initialization of the arnoldi subspace creation
94 : !> currently it can only setup a random vector but can be improved to
95 : !> various types of restarts easily
96 : !> \param matrix pointer to the matrices as described in main interface
97 : !> \param vectors work vectors for the matrix vector multiplications
98 : !> \param arnoldi_data all data concerning the subspace
99 : ! **************************************************************************************************
100 125059 : SUBROUTINE arnoldi_init(matrix, vectors, arnoldi_data)
101 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
102 : TYPE(m_x_v_vectors_type) :: vectors
103 : TYPE(arnoldi_data_type) :: arnoldi_data
104 :
105 : CHARACTER(LEN=*), PARAMETER :: routineN = 'arnoldi_init'
106 :
107 : INTEGER :: handle
108 :
109 125059 : CALL timeset(routineN, handle)
110 :
111 125059 : IF (has_d_real(arnoldi_data)) CALL arnoldi_init_d(matrix, vectors, arnoldi_data)
112 125059 : IF (has_d_cmplx(arnoldi_data)) CALL arnoldi_init_z(matrix, vectors, arnoldi_data)
113 :
114 125059 : CALL timestop(handle)
115 :
116 125059 : END SUBROUTINE arnoldi_init
117 :
118 : ! **************************************************************************************************
119 : !> \brief Interface for the initialization of the arnoldi subspace creation
120 : !> for the generalized eigenvalue problem
121 : !> \param matrix pointer to the matrices as described in main interface
122 : !> \param matrix_arnoldi ...
123 : !> \param vectors work vectors for the matrix vector multiplications
124 : !> \param arnoldi_data all data concerning the subspace
125 : ! **************************************************************************************************
126 3728 : SUBROUTINE gev_arnoldi_init(matrix, matrix_arnoldi, vectors, arnoldi_data)
127 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix, matrix_arnoldi
128 : TYPE(m_x_v_vectors_type) :: vectors
129 : TYPE(arnoldi_data_type) :: arnoldi_data
130 :
131 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_arnoldi_init'
132 :
133 : INTEGER :: handle
134 :
135 3728 : CALL timeset(routineN, handle)
136 :
137 3728 : IF (has_d_real(arnoldi_data)) CALL gev_arnoldi_init_d(matrix, matrix_arnoldi, vectors, arnoldi_data)
138 3728 : IF (has_d_cmplx(arnoldi_data)) CALL gev_arnoldi_init_z(matrix, matrix_arnoldi, vectors, arnoldi_data)
139 :
140 3728 : CALL timestop(handle)
141 :
142 3728 : END SUBROUTINE gev_arnoldi_init
143 :
144 : ! **************************************************************************************************
145 : !> \brief here the iterations are performed and the krylov space is constructed
146 : !> \param matrix see above
147 : !> \param vectors see above
148 : !> \param arnoldi_data see above
149 : ! **************************************************************************************************
150 125309 : SUBROUTINE build_subspace(matrix, vectors, arnoldi_data)
151 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
152 : TYPE(m_x_v_vectors_type) :: vectors
153 : TYPE(arnoldi_data_type) :: arnoldi_data
154 :
155 : CHARACTER(LEN=*), PARAMETER :: routineN = 'build_subspace'
156 :
157 : INTEGER :: handle
158 :
159 125309 : CALL timeset(routineN, handle)
160 :
161 125309 : IF (has_d_real(arnoldi_data)) CALL build_subspace_d(matrix, vectors, arnoldi_data)
162 125309 : IF (has_d_cmplx(arnoldi_data)) CALL build_subspace_z(matrix, vectors, arnoldi_data)
163 :
164 125309 : CALL timestop(handle)
165 :
166 125309 : END SUBROUTINE build_subspace
167 :
168 : ! **************************************************************************************************
169 : !> \brief here the iterations are performed and the krylov space for the generalized
170 : !> eigenvalue problem is created
171 : !> \param matrix see above
172 : !> \param vectors see above
173 : !> \param arnoldi_data see above
174 : ! **************************************************************************************************
175 4605 : SUBROUTINE gev_build_subspace(matrix, vectors, arnoldi_data)
176 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
177 : TYPE(m_x_v_vectors_type) :: vectors
178 : TYPE(arnoldi_data_type) :: arnoldi_data
179 :
180 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_build_subspace'
181 :
182 : INTEGER :: handle
183 :
184 4605 : CALL timeset(routineN, handle)
185 :
186 4605 : IF (has_d_real(arnoldi_data)) CALL gev_build_subspace_d(matrix, vectors, arnoldi_data)
187 4605 : IF (has_d_cmplx(arnoldi_data)) CALL gev_build_subspace_z(matrix, vectors, arnoldi_data)
188 :
189 4605 : CALL timestop(handle)
190 :
191 4605 : END SUBROUTINE gev_build_subspace
192 :
193 : ! **************************************************************************************************
194 : !> \brief in the generalized eigenvalue the matrix depende on the projection
195 : !> therefore the outer loop has to build a new set of matrices for the
196 : !> inner loop
197 : !> \param matrix see above
198 : !> \param matrix_arnoldi ...
199 : !> \param vectors ...
200 : !> \param arnoldi_data see above
201 : ! **************************************************************************************************
202 4605 : SUBROUTINE gev_update_data(matrix, matrix_arnoldi, vectors, arnoldi_data)
203 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix, matrix_arnoldi
204 : TYPE(m_x_v_vectors_type) :: vectors
205 : TYPE(arnoldi_data_type) :: arnoldi_data
206 :
207 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gev_update_data'
208 :
209 : INTEGER :: handle
210 :
211 4605 : CALL timeset(routineN, handle)
212 :
213 4605 : IF (has_d_real(arnoldi_data)) CALL gev_update_data_d(matrix, matrix_arnoldi, vectors, arnoldi_data)
214 4605 : IF (has_d_cmplx(arnoldi_data)) CALL gev_update_data_z(matrix, matrix_arnoldi, vectors, arnoldi_data)
215 :
216 4605 : CALL timestop(handle)
217 :
218 4605 : END SUBROUTINE gev_update_data
219 :
220 : ! **************************************************************************************************
221 : !> \brief ...
222 : !> \param m_out ...
223 : !> \param m_in ...
224 : ! **************************************************************************************************
225 500 : SUBROUTINE convert_matrix_z_to_d(m_out, m_in)
226 : REAL(real_8), DIMENSION(:, :) :: m_out
227 : COMPLEX(real_8), DIMENSION(:, :) :: m_in
228 :
229 451828 : m_out(:, :) = REAL(m_in(:, :), KIND=real_8)
230 500 : END SUBROUTINE convert_matrix_z_to_d
231 :
232 : ! **************************************************************************************************
233 : !> \brief ...
234 : !> \param m_out ...
235 : !> \param m_in ...
236 : ! **************************************************************************************************
237 250 : SUBROUTINE convert_matrix_d_to_z(m_out, m_in)
238 : COMPLEX(real_8), DIMENSION(:, :) :: m_out
239 : REAL(real_8), DIMENSION(:, :) :: m_in
240 :
241 225914 : m_out(:, :) = CMPLX(m_in, 0.0, KIND=real_8)
242 250 : END SUBROUTINE convert_matrix_d_to_z
243 :
244 : ! I kno that one is stupid but like this it simplifies the template
245 : ! **************************************************************************************************
246 : !> \brief ...
247 : !> \param m_out ...
248 : !> \param m_in ...
249 : ! **************************************************************************************************
250 0 : SUBROUTINE convert_matrix_z_to_z(m_out, m_in)
251 : COMPLEX(real_8), DIMENSION(:, :) :: m_out, m_in
252 :
253 0 : m_out(:, :) = m_in
254 0 : END SUBROUTINE convert_matrix_z_to_z
255 :
256 : #:for nametype1, type_prec, type_nametype1, nametype_zero, nametype_one, nametype_negone, czero, cone, ctype, rnorm_to_norm, val_to_type in inst_params_2
257 : ! **************************************************************************************************
258 : !> \brief Call the correct eigensolver, in the arnoldi method only the right
259 : !> eigenvectors are used. Lefts are created here but dumped immediately
260 : !> \param arnoldi_data ...
261 : ! **************************************************************************************************
262 129914 : SUBROUTINE compute_evals_${nametype1}$ (arnoldi_data)
263 : TYPE(arnoldi_data_type) :: arnoldi_data
264 :
265 : COMPLEX(${type_prec}$), DIMENSION(:, :), ALLOCATABLE :: levec
266 : TYPE(arnoldi_data_${nametype1}$_type), POINTER :: ar_data
267 : INTEGER :: ndim
268 : TYPE(arnoldi_control_type), POINTER :: control
269 :
270 129914 : ar_data => get_data_${nametype1}$ (arnoldi_data)
271 129914 : control => get_control(arnoldi_data)
272 129914 : ndim = control%current_step
273 519656 : ALLOCATE (levec(ndim, ndim))
274 :
275 : ! Needs antoher interface as the calls to real and complex geev differ (sucks!)
276 : ! only perform the diagonalization on processors which hold data
277 129914 : IF (control%generalized_ev) THEN
278 : CALL arnoldi_symm_local_diag('V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
279 4605 : ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim))
280 : ELSE
281 125309 : IF (control%symmetric) THEN
282 : CALL arnoldi_tridiag_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
283 8650 : ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
284 : ELSE
285 : CALL arnoldi_general_local_diag('N', 'V', ar_data%Hessenberg(1:ndim, 1:ndim), ndim, &
286 116659 : ar_data%evals(1:ndim), ar_data%revec(1:ndim, 1:ndim), levec)
287 : END IF
288 : END IF
289 :
290 129914 : DEALLOCATE (levec)
291 :
292 129914 : END SUBROUTINE compute_evals_${nametype1}$
293 :
294 : ! **************************************************************************************************
295 : !> \brief Initialization of the arnoldi vector. Here a random vector is used,
296 : !> might be generalized in the future
297 : !> \param matrix ...
298 : !> \param vectors ...
299 : !> \param arnoldi_data ...
300 : ! **************************************************************************************************
301 125059 : SUBROUTINE arnoldi_init_${nametype1}$ (matrix, vectors, arnoldi_data)
302 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
303 : TYPE(m_x_v_vectors_type) :: vectors
304 : TYPE(arnoldi_data_type) :: arnoldi_data
305 :
306 : INTEGER :: i, iseed(4), row_size, col_size, &
307 : nrow_local, ncol_local, &
308 : row, col
309 : REAL(${type_prec}$) :: rnorm
310 : TYPE(dbcsr_iterator_type) :: iter
311 : ${type_nametype1}$ :: norm
312 : ${type_nametype1}$, DIMENSION(:), ALLOCATABLE :: &
313 : v_vec, w_vec
314 125059 : ${type_nametype1}$, DIMENSION(:), POINTER :: data_vec
315 : LOGICAL :: local_comp
316 : TYPE(arnoldi_data_${nametype1}$_type), POINTER :: ar_data
317 : TYPE(arnoldi_control_type), POINTER :: control
318 : TYPE(mp_comm_type) :: pcol_group
319 :
320 250118 : control => get_control(arnoldi_data)
321 125059 : pcol_group = control%pcol_group
322 125059 : local_comp = control%local_comp
323 :
324 125059 : ar_data => get_data_${nametype1}$ (arnoldi_data)
325 :
326 : ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
327 125059 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
328 331516 : ALLOCATE (v_vec(nrow_local))
329 206457 : ALLOCATE (w_vec(nrow_local))
330 1542391 : v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
331 135044179 : ar_data%Hessenberg = ${nametype_zero}$
332 :
333 125059 : IF (control%has_initial_vector) THEN
334 : ! after calling the set routine the initial vector is stored in f_vec
335 2735 : CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
336 : ELSE
337 : ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
338 122324 : CALL dbcsr_iterator_start(iter, vectors%input_vec)
339 226894 : DO WHILE (dbcsr_iterator_blocks_left(iter))
340 104570 : CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
341 104570 : iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
342 226894 : CALL ${nametype1}$larnv(2, iseed, row_size*col_size, data_vec)
343 : END DO
344 122324 : CALL dbcsr_iterator_stop(iter)
345 : END IF
346 :
347 125059 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
348 :
349 : ! compute the vector norm of the random vectorm, get it real valued as well (rnorm)
350 125059 : CALL compute_norms_${nametype1}$ (v_vec, norm, rnorm, control%pcol_group)
351 :
352 125059 : IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
353 125059 : CALL dbcsr_scale(vectors%input_vec, REAL(1.0, ${type_prec}$)/rnorm)
354 :
355 : ! Everything prepared, initialize the Arnoldi iteration
356 125059 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
357 :
358 : ! This permits to compute the subspace of a matrix which is a product of multiple matrices
359 250118 : DO i = 1, SIZE(matrix)
360 : CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
361 125059 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
362 250118 : CALL dbcsr_copy(vectors%input_vec, vectors%result_vec)
363 : END DO
364 :
365 125059 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, w_vec, nrow_local, control%local_comp)
366 :
367 : ! Put the projection into the Hessenberg matrix, and make the vectors orthonormal
368 833725 : ar_data%Hessenberg(1, 1) = DOT_PRODUCT(v_vec, w_vec)
369 125059 : CALL pcol_group%sum(ar_data%Hessenberg(1, 1))
370 833725 : ar_data%f_vec = w_vec - v_vec*ar_data%Hessenberg(1, 1)
371 :
372 833725 : ar_data%local_history(:, 1) = v_vec(:)
373 :
374 : ! We did the first step in here so we should set the current step for the subspace generation accordingly
375 125059 : control%current_step = 1
376 :
377 125059 : DEALLOCATE (v_vec, w_vec)
378 :
379 250118 : END SUBROUTINE arnoldi_init_${nametype1}$
380 :
381 : ! **************************************************************************************************
382 : !> \brief Alogorithm for the implicit restarts in the arnoldi method
383 : !> this is an early implementation which scales subspace size^4
384 : !> by replacing the lapack calls with direct math the
385 : !> QR and gemms can be made linear and a N^2 sacling will be acchieved
386 : !> however this already sets the framework but should be used with care
387 : !> \param arnoldi_data ...
388 : ! **************************************************************************************************
389 250 : SUBROUTINE arnoldi_iram_${nametype1}$ (arnoldi_data)
390 : TYPE(arnoldi_data_type) :: arnoldi_data
391 :
392 : TYPE(arnoldi_data_${nametype1}$_type), POINTER :: ar_data
393 : TYPE(arnoldi_control_type), POINTER :: control
394 250 : COMPLEX(${type_prec}$), DIMENSION(:, :), ALLOCATABLE :: tmp_mat, safe_mat, Q, tmp_mat1
395 250 : COMPLEX(${type_prec}$), DIMENSION(:), ALLOCATABLE :: work, tau, work_measure
396 : INTEGER :: msize, lwork, i, j, info, nwant
397 : ${type_nametype1}$ :: beta, sigma
398 : ${type_nametype1}$, DIMENSION(:, :), ALLOCATABLE :: Qdata
399 :
400 : ! This is just a terribly inefficient implementation but I hope it is correct and might serve as a reference
401 250 : ar_data => get_data_${nametype1}$ (arnoldi_data)
402 250 : control => get_control(arnoldi_data)
403 250 : msize = control%current_step
404 250 : nwant = control%nval_out
405 1500 : ALLOCATE (tmp_mat(msize, msize)); ALLOCATE (safe_mat(msize, msize))
406 1250 : ALLOCATE (Q(msize, msize)); ALLOCATE (tmp_mat1(msize, msize))
407 250 : ALLOCATE (work_measure(1))
408 1500 : ALLOCATE (tau(msize)); ALLOCATE (Qdata(msize, msize))
409 : !make Q identity
410 225914 : Q = ${czero}$
411 6778 : DO i = 1, msize
412 6778 : Q(i, i) = ${cone}$
413 : END DO
414 :
415 : ! Looks a bit odd, but safe_mat will contain the result in the end, while tmpmat gets violated by lapack
416 250 : CALL convert_matrix(tmp_mat, ar_data%Hessenberg(1:msize, 1:msize))
417 225914 : safe_mat(:, :) = tmp_mat(:, :)
418 :
419 6778 : DO i = 1, msize
420 : ! A bit a strange check but in the end we only want to shift the unwanted evals
421 212858 : IF (ANY(control%selected_ind == i)) CYCLE
422 : ! Here we shift the matrix by subtracting unwanted evals from the diagonal
423 212108 : DO j = 1, msize
424 212108 : tmp_mat(j, j) = tmp_mat(j, j) - ar_data%evals(i)
425 : END DO
426 : ! Now we repair the damage by QR factorizing
427 6028 : lwork = -1
428 6028 : CALL ${ctype}$geqrf(msize, msize, tmp_mat, msize, tau, work_measure, lwork, info)
429 6028 : lwork = INT(work_measure(1))
430 6028 : IF (ALLOCATED(work)) THEN
431 5778 : IF (SIZE(work) .LT. lwork) THEN
432 0 : DEALLOCATE (work)
433 : END IF
434 : END IF
435 6528 : IF (.NOT. ALLOCATED(work)) ALLOCATE (work(lwork))
436 6028 : CALL ${ctype}$geqrf(msize, msize, tmp_mat, msize, tau, work, lwork, info)
437 : ! Ask Lapack to reconstruct Q from its own way of storing data (tmpmat will contain Q)
438 6028 : CALL ${ctype}$ungqr(msize, msize, msize, tmp_mat, msize, tau, work, lwork, info)
439 : ! update Q=Q*Q_current
440 9112716 : tmp_mat1(:, :) = Q(:, :)
441 : CALL ${ctype}$gemm('N', 'N', msize, msize, msize, ${cone}$, tmp_mat1, msize, tmp_mat, msize, ${czero}$, &
442 6028 : Q, msize)
443 : ! Update H=(Q*)HQ
444 : CALL ${ctype}$gemm('C', 'N', msize, msize, msize, ${cone}$, tmp_mat, msize, safe_mat, msize, ${czero}$, &
445 6028 : tmp_mat1, msize)
446 : CALL ${ctype}$gemm('N', 'N', msize, msize, msize, ${cone}$, tmp_mat1, msize, tmp_mat, msize, ${czero}$, &
447 6028 : safe_mat, msize)
448 :
449 : ! this one is crucial for numerics not to accumulate noise in the subdiagonals
450 212108 : DO j = 1, msize
451 4359320 : safe_mat(j + 2:msize, j) = ${czero}$
452 : END DO
453 9113466 : tmp_mat(:, :) = safe_mat(:, :)
454 : END DO
455 :
456 : ! Now we can compute our restart quantities
457 232442 : ar_data%Hessenberg = ${nametype_zero}$
458 250 : CALL convert_matrix(ar_data%Hessenberg(1:msize, 1:msize), safe_mat)
459 250 : CALL convert_matrix(Qdata, Q)
460 :
461 250 : beta = ar_data%Hessenberg(nwant + 1, nwant); sigma = Qdata(msize, nwant)
462 :
463 : !update the residuum and the basis vectors
464 250 : IF (control%local_comp) THEN
465 443564 : ar_data%f_vec = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, nwant + 1))*beta + ar_data%f_vec(:)*sigma
466 1267924 : ar_data%local_history(:, 1:nwant) = MATMUL(ar_data%local_history(:, 1:msize), Qdata(1:msize, 1:nwant))
467 : END IF
468 : ! Set the current step to nwant so the subspace build knows where to start
469 250 : control%current_step = nwant
470 :
471 250 : DEALLOCATE (tmp_mat, safe_mat, Q, Qdata, tmp_mat1, work, tau, work_measure)
472 :
473 250 : END SUBROUTINE arnoldi_iram_${nametype1}$
474 :
475 : ! **************************************************************************************************
476 : !> \brief Here we create the Krylov subspace and fill the Hessenberg matrix
477 : !> convergence check is only performed on subspace convergence
478 : !> Gram Schidt is used to orthonogonalize.
479 : !> If this is numericall not sufficient a Daniel, Gragg, Kaufman and Steward
480 : !> correction is performed
481 : !> \param matrix ...
482 : !> \param vectors ...
483 : !> \param arnoldi_data ...
484 : ! **************************************************************************************************
485 125309 : SUBROUTINE build_subspace_${nametype1}$ (matrix, vectors, arnoldi_data)
486 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
487 : TYPE(m_x_v_vectors_type), TARGET :: vectors
488 : TYPE(arnoldi_data_type) :: arnoldi_data
489 :
490 : INTEGER :: i, j, ncol_local, nrow_local
491 : REAL(${type_prec}$) :: rnorm
492 : TYPE(arnoldi_control_type), POINTER :: control
493 : TYPE(arnoldi_data_${nametype1}$_type), POINTER :: ar_data
494 : ${type_nametype1}$ :: norm
495 : ${type_nametype1}$, ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
496 : TYPE(dbcsr_type), POINTER :: input_vec, result_vec, swap_vec
497 :
498 125309 : ar_data => get_data_${nametype1}$ (arnoldi_data)
499 125309 : control => get_control(arnoldi_data)
500 125309 : control%converged = .FALSE.
501 :
502 : ! create the vectors required during the iterations
503 125309 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
504 495562 : ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
505 1699084 : v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
506 626545 : ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
507 :
508 593008 : DO j = control%current_step, control%max_iter - 1
509 :
510 : ! compute the vector norm of the residuum, get it real valued as well (rnorm)
511 589990 : CALL compute_norms_${nametype1}$ (ar_data%f_vec, norm, rnorm, control%pcol_group)
512 :
513 : ! check convergence and inform everybody about it, a bit annoying to talk to everybody because of that
514 589990 : IF (control%myproc == 0) control%converged = rnorm .LT. REAL(control%threshold, ${type_prec}$)
515 589990 : CALL control%mp_group%bcast(control%converged, 0)
516 589990 : IF (control%converged) EXIT
517 :
518 : ! transfer normalized residdum to history and its norm to the Hessenberg matrix
519 467699 : IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
520 12750653 : v_vec(:) = ar_data%f_vec(:)/rnorm; ar_data%local_history(:, j + 1) = v_vec(:); ar_data%Hessenberg(j + 1, j) = norm
521 :
522 467699 : input_vec => vectors%input_vec
523 467699 : result_vec => vectors%result_vec
524 467699 : CALL transfer_local_array_to_dbcsr_${nametype1}$ (input_vec, v_vec, nrow_local, control%local_comp)
525 :
526 : ! This permits to compute the subspace of a matrix which is a product of two matrices
527 935398 : DO i = 1, SIZE(matrix)
528 : CALL dbcsr_matrix_colvec_multiply(matrix(i)%matrix, input_vec, result_vec, ${nametype_one}$, &
529 467699 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
530 467699 : swap_vec => input_vec
531 467699 : input_vec => result_vec
532 935398 : result_vec => swap_vec
533 : END DO
534 :
535 467699 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (input_vec, w_vec, nrow_local, control%local_comp)
536 :
537 : ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
538 : CALL Gram_Schmidt_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j + 1, &
539 467699 : ar_data%local_history, ar_data%local_history, control%local_comp, control%pcol_group)
540 :
541 : ! A bit more expensive but simply always top up with a DGKS correction, otherwise numerics
542 : ! can become a problem later on, there is probably a good check whether it's necessary, but we don't perform it
543 : CALL DGKS_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, nrow_local, j + 1, ar_data%local_history, &
544 467699 : ar_data%local_history, control%local_comp, control%pcol_group)
545 : ! Finally we can put the projections into our Hessenberg matrix
546 4440024 : ar_data%Hessenberg(1:j + 1, j + 1) = h_vec(1:j + 1)
547 593008 : control%current_step = j + 1
548 : END DO
549 :
550 : ! compute the vector norm of the final residuum and put it in to Hessenberg
551 125309 : CALL compute_norms_${nametype1}$ (ar_data%f_vec, norm, rnorm, control%pcol_group)
552 125309 : ar_data%Hessenberg(control%current_step + 1, control%current_step) = norm
553 :
554 : ! broadcast the Hessenberg matrix so we don't need to care later on
555 270427933 : CALL control%mp_group%bcast(ar_data%Hessenberg, 0)
556 :
557 125309 : DEALLOCATE (v_vec, w_vec, h_vec, s_vec)
558 :
559 125309 : END SUBROUTINE build_subspace_${nametype1}$
560 :
561 : ! **************************************************************************************************
562 : !> \brief Helper routine to transfer the all data of a dbcsr matrix to a local array
563 : !> \param vec ...
564 : !> \param array ...
565 : !> \param n ...
566 : !> \param is_local ...
567 : ! **************************************************************************************************
568 1016516 : SUBROUTINE transfer_dbcsr_to_local_array_${nametype1}$ (vec, array, n, is_local)
569 : TYPE(dbcsr_type) :: vec
570 : ${type_nametype1}$, DIMENSION(:) :: array
571 : INTEGER :: n
572 : LOGICAL :: is_local
573 1016516 : ${type_nametype1}$, DIMENSION(:), POINTER :: data_vec
574 :
575 2033032 : data_vec => dbcsr_get_data_p(vec, select_data_type=${nametype_zero}$)
576 13532709 : IF (is_local) array(1:n) = data_vec(1:n)
577 :
578 1016516 : END SUBROUTINE transfer_dbcsr_to_local_array_${nametype1}$
579 :
580 : ! **************************************************************************************************
581 : !> \brief The inverse routine transferring data back from an array to a dbcsr
582 : !> \param vec ...
583 : !> \param array ...
584 : !> \param n ...
585 : !> \param is_local ...
586 : ! **************************************************************************************************
587 634941 : SUBROUTINE transfer_local_array_to_dbcsr_${nametype1}$ (vec, array, n, is_local)
588 : TYPE(dbcsr_type) :: vec
589 : ${type_nametype1}$, DIMENSION(:) :: array
590 : INTEGER :: n
591 : LOGICAL :: is_local
592 634941 : ${type_nametype1}$, DIMENSION(:), POINTER :: data_vec
593 :
594 1269882 : data_vec => dbcsr_get_data_p(vec, select_data_type=${nametype_zero}$)
595 10911307 : IF (is_local) data_vec(1:n) = array(1:n)
596 :
597 634941 : END SUBROUTINE transfer_local_array_to_dbcsr_${nametype1}$
598 :
599 : ! **************************************************************************************************
600 : !> \brief Gram-Schmidt in matrix vector form
601 : !> \param h_vec ...
602 : !> \param f_vec ...
603 : !> \param s_vec ...
604 : !> \param w_vec ...
605 : !> \param nrow_local ...
606 : !> \param j ...
607 : !> \param local_history ...
608 : !> \param reorth_mat ...
609 : !> \param local_comp ...
610 : !> \param pcol_group ...
611 : ! **************************************************************************************************
612 544322 : SUBROUTINE Gram_Schmidt_ortho_${nametype1}$ (h_vec, f_vec, s_vec, w_vec, nrow_local, &
613 544322 : j, local_history, reorth_mat, local_comp, pcol_group)
614 : ${type_nametype1}$, DIMENSION(:) :: h_vec, s_vec, f_vec, w_vec
615 : ${type_nametype1}$, DIMENSION(:, :) :: local_history, reorth_mat
616 : INTEGER :: nrow_local, j
617 : TYPE(mp_comm_type), INTENT(IN) :: pcol_group
618 : LOGICAL :: local_comp
619 :
620 : CHARACTER(LEN=*), PARAMETER :: routineN = 'Gram_Schmidt_ortho_${nametype1}$'
621 : INTEGER :: handle
622 :
623 544322 : CALL timeset(routineN, handle)
624 :
625 : ! Let's do the orthonormalization, first try the Gram Schmidt scheme
626 42880751 : h_vec = ${nametype_zero}$; f_vec = ${nametype_zero}$; s_vec = ${nametype_zero}$
627 544322 : IF (local_comp) CALL ${nametype1}$gemv('T', nrow_local, j, ${nametype_one}$, local_history, &
628 460645 : nrow_local, w_vec, 1, ${nametype_zero}$, h_vec, 1)
629 9913372 : CALL pcol_group%sum(h_vec(1:j))
630 :
631 544322 : IF (local_comp) CALL ${nametype1}$gemv('N', nrow_local, j, ${nametype_one}$, reorth_mat, &
632 460645 : nrow_local, h_vec, 1, ${nametype_zero}$, f_vec, 1)
633 8609339 : f_vec(:) = w_vec(:) - f_vec(:)
634 :
635 544322 : CALL timestop(handle)
636 :
637 544322 : END SUBROUTINE Gram_Schmidt_ortho_${nametype1}$
638 :
639 : ! **************************************************************************************************
640 : !> \brief Compute the Daniel, Gragg, Kaufman and Steward correction
641 : !> \param h_vec ...
642 : !> \param f_vec ...
643 : !> \param s_vec ...
644 : !> \param nrow_local ...
645 : !> \param j ...
646 : !> \param local_history ...
647 : !> \param reorth_mat ...
648 : !> \param local_comp ...
649 : !> \param pcol_group ...
650 : ! **************************************************************************************************
651 544322 : SUBROUTINE DGKS_ortho_${nametype1}$ (h_vec, f_vec, s_vec, nrow_local, j, &
652 1088644 : local_history, reorth_mat, local_comp, pcol_group)
653 : ${type_nametype1}$, DIMENSION(:) :: h_vec, s_vec, f_vec
654 : ${type_nametype1}$, DIMENSION(:, :) :: local_history, reorth_mat
655 : INTEGER :: nrow_local, j
656 : TYPE(mp_comm_type), INTENT(IN) :: pcol_group
657 :
658 : CHARACTER(LEN=*), PARAMETER :: routineN = 'DGKS_ortho_${nametype1}$'
659 : LOGICAL :: local_comp
660 : INTEGER :: handle
661 :
662 544322 : CALL timeset(routineN, handle)
663 :
664 544322 : IF (local_comp) CALL ${nametype1}$gemv('T', nrow_local, j, ${nametype_one}$, local_history, &
665 460645 : nrow_local, f_vec, 1, ${nametype_zero}$, s_vec, 1)
666 9913372 : CALL pcol_group%sum(s_vec(1:j))
667 544322 : IF (local_comp) CALL ${nametype1}$gemv('N', nrow_local, j, ${nametype_negone}$, reorth_mat, &
668 460645 : nrow_local, s_vec, 1, ${nametype_one}$, f_vec, 1)
669 5228847 : h_vec(1:j) = h_vec(1:j) + s_vec(1:j)
670 :
671 544322 : CALL timestop(handle)
672 :
673 544322 : END SUBROUTINE DGKS_ortho_${nametype1}$
674 :
675 : ! **************************************************************************************************
676 : !> \brief Compute the norm of a vector distributed along proc_col
677 : !> as local arrays. Always return the real part next to the complex rep.
678 : !> \param vec ...
679 : !> \param norm ...
680 : !> \param rnorm ...
681 : !> \param pcol_group ...
682 : ! **************************************************************************************************
683 853296 : SUBROUTINE compute_norms_${nametype1}$ (vec, norm, rnorm, pcol_group)
684 : ${type_nametype1}$, DIMENSION(:) :: vec
685 : REAL(${type_prec}$) :: rnorm
686 : ${type_nametype1}$ :: norm
687 : TYPE(mp_comm_type), INTENT(IN) :: pcol_group
688 :
689 : ! the norm is computed along the processor column
690 9307725 : norm = DOT_PRODUCT(vec, vec)
691 853296 : CALL pcol_group%sum(norm)
692 853296 : rnorm = SQRT(REAL(norm, ${type_prec}$))
693 853296 : ${rnorm_to_norm}$
694 :
695 853296 : END SUBROUTINE compute_norms_${nametype1}$
696 :
697 : ! **************************************************************************************************
698 : !> \brief Computes the initial guess for the solution of the generalized eigenvalue
699 : !> using the arnoldi method
700 : !> \param matrix ...
701 : !> \param matrix_arnoldi ...
702 : !> \param vectors ...
703 : !> \param arnoldi_data ...
704 : ! **************************************************************************************************
705 :
706 3728 : SUBROUTINE gev_arnoldi_init_${nametype1}$ (matrix, matrix_arnoldi, vectors, arnoldi_data)
707 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
708 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix_arnoldi
709 : TYPE(m_x_v_vectors_type) :: vectors
710 : TYPE(arnoldi_data_type) :: arnoldi_data
711 :
712 : INTEGER :: iseed(4), row_size, col_size, &
713 : nrow_local, ncol_local, &
714 : row, col
715 : REAL(${type_prec}$) :: rnorm
716 : TYPE(dbcsr_iterator_type) :: iter
717 : ${type_nametype1}$ :: norm, denom
718 : ${type_nametype1}$, DIMENSION(:), ALLOCATABLE :: &
719 : v_vec, w_vec
720 3728 : ${type_nametype1}$, DIMENSION(:), POINTER :: data_vec
721 : LOGICAL :: local_comp
722 : TYPE(arnoldi_data_${nametype1}$_type), POINTER :: ar_data
723 : TYPE(arnoldi_control_type), POINTER :: control
724 : TYPE(mp_comm_type) :: pcol_group
725 :
726 7456 : control => get_control(arnoldi_data)
727 3728 : pcol_group = control%pcol_group
728 3728 : local_comp = control%local_comp
729 :
730 3728 : ar_data => get_data_${nametype1}$ (arnoldi_data)
731 :
732 : ! create a local data copy to store the vectors and make Gram Schmidt a bit simpler
733 3728 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
734 11156 : ALLOCATE (v_vec(nrow_local))
735 7428 : ALLOCATE (w_vec(nrow_local))
736 132348 : v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
737 1644048 : ar_data%Hessenberg = ${nametype_zero}$
738 :
739 3728 : IF (control%has_initial_vector) THEN
740 : ! after calling the set routine the initial vector is stored in f_vec
741 2051 : CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
742 : ELSE
743 : ! Setup the initial normalized random vector (sufficient if it only happens on proc_col 0)
744 1677 : CALL dbcsr_iterator_start(iter, vectors%input_vec)
745 5095 : DO WHILE (dbcsr_iterator_blocks_left(iter))
746 3418 : CALL dbcsr_iterator_next_block(iter, row, col, data_vec, row_size=row_size, col_size=col_size)
747 3418 : iseed(1) = 2; iseed(2) = MOD(row, 4095); iseed(3) = MOD(col, 4095); iseed(4) = 11
748 5095 : CALL ${nametype1}$larnv(2, iseed, row_size*col_size, data_vec)
749 : END DO
750 1677 : CALL dbcsr_iterator_stop(iter)
751 : END IF
752 :
753 3728 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
754 :
755 : ! compute the vector norm of the reandom vectorm, get it real valued as well (rnorm)
756 3728 : CALL compute_norms_${nametype1}$ (v_vec, norm, rnorm, control%pcol_group)
757 :
758 3728 : IF (rnorm == 0) rnorm = 1 ! catch case where this rank has no actual data
759 3728 : CALL dbcsr_scale(vectors%input_vec, REAL(1.0, ${type_prec}$)/rnorm)
760 :
761 : CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
762 3728 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
763 :
764 3728 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, w_vec, nrow_local, control%local_comp)
765 :
766 : ar_data%rho_scale = ${nametype_zero}$
767 68038 : ar_data%rho_scale = DOT_PRODUCT(v_vec, w_vec)
768 3728 : CALL pcol_group%sum(ar_data%rho_scale)
769 :
770 : CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
771 3728 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
772 :
773 3728 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, w_vec, nrow_local, control%local_comp)
774 :
775 : denom = ${nametype_zero}$
776 68038 : denom = DOT_PRODUCT(v_vec, w_vec)
777 3728 : CALL pcol_group%sum(denom)
778 3728 : IF (control%myproc == 0) ar_data%rho_scale = ar_data%rho_scale/denom
779 3728 : CALL control%mp_group%bcast(ar_data%rho_scale, 0)
780 :
781 : ! if the maximum ev is requested we need to optimize with -A-rho*B
782 3728 : CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
783 3728 : CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, ${nametype_one}$, -ar_data%rho_scale)
784 :
785 68038 : ar_data%x_vec = v_vec
786 :
787 11184 : END SUBROUTINE gev_arnoldi_init_${nametype1}$
788 :
789 : ! **************************************************************************************************
790 : !> \brief builds the basis rothogonal wrt. the metric.
791 : !> The structure looks similar to normal arnoldi but norms, vectors and
792 : !> matrix_vector products are very differently defined. Therefore it is
793 : !> cleaner to put it in a separate subroutine to avoid confusion
794 : !> \param matrix ...
795 : !> \param vectors ...
796 : !> \param arnoldi_data ...
797 : ! **************************************************************************************************
798 4605 : SUBROUTINE gev_build_subspace_${nametype1}$ (matrix, vectors, arnoldi_data)
799 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
800 : TYPE(m_x_v_vectors_type) :: vectors
801 : TYPE(arnoldi_data_type) :: arnoldi_data
802 :
803 : INTEGER :: j, ncol_local, nrow_local
804 : TYPE(arnoldi_control_type), POINTER :: control
805 : TYPE(arnoldi_data_${nametype1}$_type), POINTER :: ar_data
806 : ${type_nametype1}$ :: norm
807 : ${type_nametype1}$, ALLOCATABLE, DIMENSION(:) :: h_vec, s_vec, v_vec, w_vec
808 4605 : ${type_nametype1}$, ALLOCATABLE, DIMENSION(:, :) :: Zmat, CZmat, BZmat
809 : TYPE(mp_comm_type) :: pcol_group
810 :
811 9210 : ar_data => get_data_${nametype1}$ (arnoldi_data)
812 4605 : control => get_control(arnoldi_data)
813 4605 : control%converged = .FALSE.
814 4605 : pcol_group = control%pcol_group
815 :
816 : ! create the vectors required during the iterations
817 4605 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
818 18362 : ALLOCATE (v_vec(nrow_local)); ALLOCATE (w_vec(nrow_local))
819 213313 : v_vec = ${nametype_zero}$; w_vec = ${nametype_zero}$
820 18420 : ALLOCATE (s_vec(control%max_iter)); ALLOCATE (h_vec(control%max_iter))
821 27572 : ALLOCATE (Zmat(nrow_local, control%max_iter)); ALLOCATE (CZmat(nrow_local, control%max_iter))
822 13786 : ALLOCATE (BZmat(nrow_local, control%max_iter))
823 :
824 4605 : CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%x_vec, nrow_local, control%local_comp)
825 : CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
826 4605 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
827 4605 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, BZmat(:, 1), nrow_local, control%local_comp)
828 :
829 : norm = ${nametype_zero}$
830 108959 : norm = DOT_PRODUCT(ar_data%x_vec, BZmat(:, 1))
831 4605 : CALL pcol_group%sum(norm)
832 4605 : IF (control%local_comp) THEN
833 213284 : Zmat(:, 1) = ar_data%x_vec/SQRT(norm); BZmat(:, 1) = BZmat(:, 1)/SQRT(norm)
834 : END IF
835 :
836 76623 : DO j = 1, control%max_iter
837 76623 : control%current_step = j
838 76623 : CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, Zmat(:, j), nrow_local, control%local_comp)
839 : CALL dbcsr_matrix_colvec_multiply(matrix(1)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
840 76623 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
841 76623 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, CZmat(:, j), nrow_local, control%local_comp)
842 2000163 : w_vec(:) = CZmat(:, j)
843 :
844 : ! Let's do the orthonormalization, to get the new f_vec. First try the Gram Schmidt scheme
845 : CALL Gram_Schmidt_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, w_vec, nrow_local, j, &
846 76623 : BZmat, Zmat, control%local_comp, control%pcol_group)
847 :
848 : ! A bit more expensive but simpliy always top up with a DGKS correction, otherwise numerics
849 : ! can becom a problem later on, there is probably a good check, but we don't perform it
850 : CALL DGKS_ortho_${nametype1}$ (h_vec, ar_data%f_vec, s_vec, nrow_local, j, BZmat, &
851 76623 : Zmat, control%local_comp, control%pcol_group)
852 :
853 76623 : CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, ar_data%f_vec, nrow_local, control%local_comp)
854 : CALL dbcsr_matrix_colvec_multiply(matrix(2)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
855 76623 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
856 76623 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, v_vec, nrow_local, control%local_comp)
857 : norm = ${nametype_zero}$
858 2000163 : norm = DOT_PRODUCT(ar_data%f_vec, v_vec)
859 76623 : CALL pcol_group%sum(norm)
860 :
861 76623 : IF (control%myproc == 0) control%converged = REAL(norm, ${type_prec}$) .LT. EPSILON(REAL(1.0, ${type_prec}$))
862 76623 : CALL control%mp_group%bcast(control%converged, 0)
863 76623 : IF (control%converged) EXIT
864 74555 : IF (j == control%max_iter - 1) EXIT
865 :
866 76623 : IF (control%local_comp) THEN
867 3710054 : Zmat(:, j + 1) = ar_data%f_vec/SQRT(norm); BZmat(:, j + 1) = v_vec(:)/SQRT(norm)
868 : END IF
869 : END DO
870 :
871 : ! getting a bit more complicated here as the final matrix is again a product which has to be computed with the
872 : ! distributed vectors, therefore a sum along the first proc_col is necessary. As we want that matrix everywhere,
873 : ! we set it to zero before and compute the distributed product only on the first col and then sum over the full grid
874 2030805 : ar_data%Hessenberg = ${nametype_zero}$
875 4605 : IF (control%local_comp) THEN
876 : ar_data%Hessenberg(1:control%current_step, 1:control%current_step) = &
877 24431267 : MATMUL(TRANSPOSE(CZmat(:, 1:control%current_step)), Zmat(:, 1:control%current_step))
878 : END IF
879 4057005 : CALL control%mp_group%sum(ar_data%Hessenberg)
880 :
881 2183785 : ar_data%local_history = Zmat
882 : ! broadcast the Hessenberg matrix so we don't need to care later on
883 :
884 4605 : DEALLOCATE (v_vec); DEALLOCATE (w_vec); DEALLOCATE (s_vec); DEALLOCATE (h_vec); DEALLOCATE (CZmat);
885 4605 : DEALLOCATE (Zmat); DEALLOCATE (BZmat)
886 :
887 9210 : END SUBROUTINE gev_build_subspace_${nametype1}$
888 :
889 : ! **************************************************************************************************
890 : !> \brief Updates all data after an inner loop of the generalized ev arnoldi.
891 : !> Updates rho and C=A-rho*B accordingly.
892 : !> As an update scheme is used for he ev, the output ev has to be replaced
893 : !> with the updated one.
894 : !> Furthermore a convergence check is performed. The mv product could
895 : !> be skiiped by making clever use of the precomputed data, However,
896 : !> it is most likely not worth the effort.
897 : !> \param matrix ...
898 : !> \param matrix_arnoldi ...
899 : !> \param vectors ...
900 : !> \param arnoldi_data ...
901 : ! **************************************************************************************************
902 4605 : SUBROUTINE gev_update_data_${nametype1}$ (matrix, matrix_arnoldi, vectors, arnoldi_data)
903 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix
904 : TYPE(dbcsr_p_type), DIMENSION(:) :: matrix_arnoldi
905 : TYPE(m_x_v_vectors_type) :: vectors
906 : TYPE(arnoldi_data_type) :: arnoldi_data
907 :
908 : TYPE(arnoldi_control_type), POINTER :: control
909 : TYPE(arnoldi_data_${nametype1}$_type), POINTER :: ar_data
910 : INTEGER :: ncol_local, nrow_local, ind, i
911 4605 : ${type_nametype1}$, ALLOCATABLE, DIMENSION(:) :: v_vec
912 : REAL(${type_prec}$) :: rnorm
913 : ${type_nametype1}$ :: norm
914 : COMPLEX(${type_prec}$) :: val
915 :
916 4605 : control => get_control(arnoldi_data)
917 :
918 4605 : ar_data => get_data_${nametype1}$ (arnoldi_data)
919 :
920 : ! compute the new shift, hack around the problem templating the conversion
921 4605 : val = ar_data%evals(control%selected_ind(1))
922 4605 : ar_data%rho_scale = ar_data%rho_scale + ${val_to_type}$
923 : ! compute the new eigenvector / initial guess for the next arnoldi loop
924 108959 : ar_data%x_vec = ${nametype_zero}$
925 81228 : DO i = 1, control%current_step
926 76623 : val = ar_data%revec(i, control%selected_ind(1))
927 2004768 : ar_data%x_vec(:) = ar_data%x_vec(:) + ar_data%local_history(:, i)*${val_to_type}$
928 : END DO
929 : ! ar_data%x_vec(:)=MATMUL(ar_data%local_history(:,1:control%current_step),&
930 : ! ar_data%revec(1:control%current_step,control%selected_ind(1)))
931 :
932 : ! update the C-matrix (A-rho*B), if the maximum value is requested we have to use -A-rho*B
933 4605 : CALL dbcsr_copy(matrix_arnoldi(1)%matrix, matrix(1)%matrix)
934 4605 : CALL dbcsr_add(matrix_arnoldi(1)%matrix, matrix(2)%matrix, ${nametype_one}$, -ar_data%rho_scale)
935 :
936 : ! compute convergence and set the correct eigenvalue and eigenvector
937 4605 : CALL dbcsr_get_info(matrix=vectors%input_vec, nfullrows_local=nrow_local, nfullcols_local=ncol_local)
938 4605 : IF (ncol_local > 0) THEN
939 13786 : ALLOCATE (v_vec(nrow_local))
940 4605 : CALL compute_norms_${nametype1}$ (ar_data%x_vec, norm, rnorm, control%pcol_group)
941 108959 : v_vec(:) = ar_data%x_vec(:)/rnorm
942 : END IF
943 4605 : CALL transfer_local_array_to_dbcsr_${nametype1}$ (vectors%input_vec, v_vec, nrow_local, control%local_comp)
944 : CALL dbcsr_matrix_colvec_multiply(matrix_arnoldi(1)%matrix, vectors%input_vec, vectors%result_vec, ${nametype_one}$, &
945 4605 : ${nametype_zero}$, vectors%rep_row_vec, vectors%rep_col_vec)
946 4605 : CALL transfer_dbcsr_to_local_array_${nametype1}$ (vectors%result_vec, v_vec, nrow_local, control%local_comp)
947 4605 : IF (ncol_local > 0) THEN
948 4605 : CALL compute_norms_${nametype1}$ (v_vec, norm, rnorm, control%pcol_group)
949 : ! check convergence
950 4605 : control%converged = rnorm .LT. control%threshold
951 4605 : DEALLOCATE (v_vec)
952 : END IF
953 : ! and broadcast the real eigenvalue
954 4605 : CALL control%mp_group%bcast(control%converged, 0)
955 4605 : ind = control%selected_ind(1)
956 4605 : CALL control%mp_group%bcast(ar_data%rho_scale, 0)
957 :
958 : ! Again the maximum value request is done on -A therefore the eigenvalue needs the opposite sign
959 4605 : ar_data%evals(ind) = ar_data%rho_scale
960 :
961 4605 : END SUBROUTINE gev_update_data_${nametype1}$
962 : #:endfor
963 :
964 250 : END MODULE arnoldi_methods
|