Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief module that contains the algorithms to perform an itrative
10 : !> diagonalization by the block-Lanczos approach
11 : !> \par History
12 : !> 05.2009 created [MI]
13 : !> \author fawzi
14 : ! **************************************************************************************************
15 : MODULE qs_scf_lanczos
16 :
17 : USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale,&
18 : cp_fm_qr_factorization,&
19 : cp_fm_scale_and_add,&
20 : cp_fm_transpose,&
21 : cp_fm_triangular_multiply
22 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
23 : USE cp_fm_diag, ONLY: choose_eigv_solver
24 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
25 : cp_fm_struct_release,&
26 : cp_fm_struct_type
27 : USE cp_fm_types, ONLY: cp_fm_create,&
28 : cp_fm_get_submatrix,&
29 : cp_fm_release,&
30 : cp_fm_set_all,&
31 : cp_fm_set_submatrix,&
32 : cp_fm_to_fm,&
33 : cp_fm_type,&
34 : cp_fm_vectorsnorm
35 : USE cp_log_handling, ONLY: cp_to_string
36 : USE kinds, ONLY: dp
37 : USE parallel_gemm_api, ONLY: parallel_gemm
38 : USE qs_mo_types, ONLY: get_mo_set,&
39 : mo_set_type
40 : USE qs_scf_types, ONLY: krylov_space_type
41 : USE scf_control_types, ONLY: scf_control_type
42 : #include "./base/base_uses.f90"
43 :
44 : IMPLICIT NONE
45 : PRIVATE
46 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos'
47 :
48 : PUBLIC :: krylov_space_allocate, lanczos_refinement, lanczos_refinement_2v
49 :
50 : CONTAINS
51 :
52 : ! **************************************************************************************************
53 :
54 : ! **************************************************************************************************
55 : !> \brief allocates matrices and vectros used in the construction of
56 : !> the krylov space and for the lanczos refinement
57 : !> \param krylov_space ...
58 : !> \param scf_control ...
59 : !> \param mos ...
60 : !> \param
61 : !> \par History
62 : !> 05.2009 created [MI]
63 : ! **************************************************************************************************
64 :
65 8 : SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos)
66 :
67 : TYPE(krylov_space_type), INTENT(INOUT) :: krylov_space
68 : TYPE(scf_control_type), POINTER :: scf_control
69 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN) :: mos
70 :
71 : CHARACTER(LEN=*), PARAMETER :: routineN = 'krylov_space_allocate'
72 :
73 : INTEGER :: handle, ik, ispin, max_nmo, nao, nblock, &
74 : ndim, nk, nmo, nspin
75 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
76 : TYPE(cp_fm_type), POINTER :: mo_coeff
77 :
78 8 : CALL timeset(routineN, handle)
79 :
80 8 : IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN
81 4 : NULLIFY (fm_struct_tmp, mo_coeff)
82 :
83 4 : krylov_space%nkrylov = scf_control%diagonalization%nkrylov
84 4 : krylov_space%nblock = scf_control%diagonalization%nblock_krylov
85 :
86 4 : nk = krylov_space%nkrylov
87 4 : nblock = krylov_space%nblock
88 4 : nspin = SIZE(mos, 1)
89 :
90 16 : ALLOCATE (krylov_space%mo_conv(nspin))
91 16 : ALLOCATE (krylov_space%mo_refine(nspin))
92 16 : ALLOCATE (krylov_space%chc_mat(nspin))
93 16 : ALLOCATE (krylov_space%c_vec(nspin))
94 4 : max_nmo = 0
95 8 : DO ispin = 1, nspin
96 4 : CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff, nao=nao, nmo=nmo)
97 4 : CALL cp_fm_create(krylov_space%mo_conv(ispin), mo_coeff%matrix_struct)
98 4 : CALL cp_fm_create(krylov_space%mo_refine(ispin), mo_coeff%matrix_struct)
99 4 : NULLIFY (fm_struct_tmp)
100 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, &
101 : para_env=mo_coeff%matrix_struct%para_env, &
102 4 : context=mo_coeff%matrix_struct%context)
103 4 : CALL cp_fm_create(krylov_space%chc_mat(ispin), fm_struct_tmp, "chc")
104 4 : CALL cp_fm_create(krylov_space%c_vec(ispin), fm_struct_tmp, "vec")
105 4 : CALL cp_fm_struct_release(fm_struct_tmp)
106 12 : max_nmo = MAX(max_nmo, nmo)
107 : END DO
108 :
109 : !the use of max_nmo might not be ok, in this case allocate nspin matrices
110 12 : ALLOCATE (krylov_space%c_eval(max_nmo))
111 :
112 44 : ALLOCATE (krylov_space%v_mat(nk))
113 :
114 4 : NULLIFY (fm_struct_tmp)
115 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, &
116 : para_env=mo_coeff%matrix_struct%para_env, &
117 4 : context=mo_coeff%matrix_struct%context)
118 36 : DO ik = 1, nk
119 : CALL cp_fm_create(krylov_space%v_mat(ik), matrix_struct=fm_struct_tmp, &
120 36 : name="v_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
121 : END DO
122 4 : ALLOCATE (krylov_space%tmp_mat)
123 : CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
124 4 : name="tmp_mat")
125 4 : CALL cp_fm_struct_release(fm_struct_tmp)
126 :
127 : ! NOTE: the following matrices are small and could be defined
128 : ! as standard array rather than istributed fm
129 4 : NULLIFY (fm_struct_tmp)
130 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, &
131 : para_env=mo_coeff%matrix_struct%para_env, &
132 4 : context=mo_coeff%matrix_struct%context)
133 4 : ALLOCATE (krylov_space%block1_mat)
134 : CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
135 4 : name="a_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
136 4 : ALLOCATE (krylov_space%block2_mat)
137 : CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
138 4 : name="b_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
139 4 : ALLOCATE (krylov_space%block3_mat)
140 : CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
141 4 : name="b2_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
142 4 : CALL cp_fm_struct_release(fm_struct_tmp)
143 :
144 4 : ndim = nblock*nk
145 4 : NULLIFY (fm_struct_tmp)
146 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
147 : para_env=mo_coeff%matrix_struct%para_env, &
148 4 : context=mo_coeff%matrix_struct%context)
149 4 : ALLOCATE (krylov_space%block4_mat)
150 : CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
151 4 : name="t_mat")
152 4 : ALLOCATE (krylov_space%block5_mat)
153 : CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
154 4 : name="t_vec")
155 4 : CALL cp_fm_struct_release(fm_struct_tmp)
156 12 : ALLOCATE (krylov_space%t_eval(ndim))
157 :
158 : ELSE
159 : !Nothing should be done
160 : END IF
161 :
162 8 : CALL timestop(handle)
163 :
164 8 : END SUBROUTINE krylov_space_allocate
165 :
166 : ! **************************************************************************************************
167 : !> \brief lanczos refinement by blocks of not-converged MOs
168 : !> \param krylov_space ...
169 : !> \param ks ...
170 : !> \param c0 ...
171 : !> \param c1 ...
172 : !> \param eval ...
173 : !> \param nao ...
174 : !> \param eps_iter ...
175 : !> \param ispin ...
176 : !> \param check_moconv_only ...
177 : !> \param
178 : !> \par History
179 : !> 05.2009 created [MI]
180 : ! **************************************************************************************************
181 :
182 0 : SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, &
183 : eps_iter, ispin, check_moconv_only)
184 :
185 : TYPE(krylov_space_type), POINTER :: krylov_space
186 : TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
187 : REAL(dp), DIMENSION(:), POINTER :: eval
188 : INTEGER, INTENT(IN) :: nao
189 : REAL(dp), INTENT(IN) :: eps_iter
190 : INTEGER, INTENT(IN) :: ispin
191 : LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
192 :
193 : CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement'
194 : REAL(KIND=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
195 : rzero = 0.0_dp
196 :
197 : INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
198 : nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
199 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
200 : LOGICAL :: my_check_moconv_only
201 : REAL(dp) :: max_norm, min_norm, vmax
202 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: q_mat, tblock, tvblock
203 : REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
204 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
205 : TYPE(cp_fm_type) :: c2_tmp, c3_tmp, c_tmp, hc
206 0 : TYPE(cp_fm_type), DIMENSION(:), POINTER :: v_mat
207 : TYPE(cp_fm_type), POINTER :: a_mat, b2_mat, b_mat, chc, evec, t_mat, &
208 : t_vec
209 :
210 0 : CALL timeset(routineN, handle)
211 :
212 0 : NULLIFY (fm_struct_tmp)
213 0 : NULLIFY (chc, evec)
214 : NULLIFY (c_res, t_eval)
215 0 : NULLIFY (t_mat, t_vec)
216 0 : NULLIFY (a_mat, b_mat, b2_mat, v_mat)
217 :
218 0 : nmo = SIZE(eval, 1)
219 0 : my_check_moconv_only = .FALSE.
220 0 : IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
221 :
222 0 : chc => krylov_space%chc_mat(ispin)
223 0 : evec => krylov_space%c_vec(ispin)
224 0 : c_res => krylov_space%c_eval
225 0 : t_eval => krylov_space%t_eval
226 :
227 : NULLIFY (fm_struct_tmp)
228 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
229 : para_env=c0%matrix_struct%para_env, &
230 0 : context=c0%matrix_struct%context)
231 : CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
232 0 : name="c_tmp")
233 : CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
234 0 : name="hc")
235 0 : CALL cp_fm_struct_release(fm_struct_tmp)
236 :
237 : !Compute (C^t)HC
238 0 : CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
239 0 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
240 :
241 : !Diagonalize (C^t)HC
242 0 : CALL timeset(routineN//"diag_chc", hand1)
243 0 : CALL choose_eigv_solver(chc, evec, eval)
244 0 : CALL timestop(hand1)
245 :
246 : !Rotate the C vectors
247 0 : CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
248 :
249 : !Check for converged states
250 0 : CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
251 0 : CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
252 0 : CALL cp_fm_column_scale(c1, eval)
253 0 : CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
254 0 : CALL cp_fm_vectorsnorm(c_tmp, c_res)
255 :
256 0 : nmo_converged = 0
257 0 : nmo_nc = 0
258 0 : max_norm = 0.0_dp
259 0 : min_norm = 1.e10_dp
260 0 : CALL cp_fm_set_all(c1, rzero)
261 0 : DO imo = 1, nmo
262 0 : max_norm = MAX(max_norm, c_res(imo))
263 0 : min_norm = MIN(min_norm, c_res(imo))
264 : END DO
265 0 : DO imo = 1, nmo
266 0 : IF (c_res(imo) <= eps_iter) THEN
267 0 : nmo_converged = nmo_converged + 1
268 : ELSE
269 0 : nmo_nc = nmo - nmo_converged
270 0 : EXIT
271 : END IF
272 : END DO
273 :
274 0 : nblock = krylov_space%nblock
275 0 : num_blocks = nmo_nc/nblock
276 :
277 0 : krylov_space%nmo_nc = nmo_nc
278 0 : krylov_space%nmo_conv = nmo_converged
279 0 : krylov_space%max_res_norm = max_norm
280 0 : krylov_space%min_res_norm = min_norm
281 :
282 0 : IF (my_check_moconv_only) THEN
283 0 : CALL cp_fm_release(c_tmp)
284 0 : CALL cp_fm_release(hc)
285 0 : CALL timestop(handle)
286 0 : RETURN
287 0 : ELSE IF (krylov_space%nmo_nc > 0) THEN
288 :
289 0 : CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
290 :
291 0 : nblock = krylov_space%nblock
292 0 : IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
293 0 : num_blocks = nmo_nc/nblock + 1
294 : ELSE
295 0 : num_blocks = nmo_nc/nblock
296 : END IF
297 :
298 0 : DO ib = 1, num_blocks
299 :
300 0 : imo_low = (ib - 1)*nblock + 1
301 0 : imo_up = MIN(ib*nblock, nmo_nc)
302 0 : nmob = imo_up - imo_low + 1
303 0 : ndim = krylov_space%nkrylov*nmob
304 :
305 0 : NULLIFY (fm_struct_tmp)
306 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
307 : para_env=c0%matrix_struct%para_env, &
308 0 : context=c0%matrix_struct%context)
309 : CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
310 0 : name="c2_tmp")
311 0 : CALL cp_fm_struct_release(fm_struct_tmp)
312 0 : NULLIFY (fm_struct_tmp)
313 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, &
314 : para_env=c0%matrix_struct%para_env, &
315 0 : context=c0%matrix_struct%context)
316 : CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
317 0 : name="c3_tmp")
318 0 : CALL cp_fm_struct_release(fm_struct_tmp)
319 :
320 : ! Create local matrix of right size
321 0 : IF (nmob /= nblock) THEN
322 0 : NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
323 0 : ALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
324 0 : NULLIFY (fm_struct_tmp)
325 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
326 : para_env=chc%matrix_struct%para_env, &
327 0 : context=chc%matrix_struct%context)
328 : CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, &
329 0 : name="a_mat")
330 : CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
331 0 : name="b_mat")
332 : CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
333 0 : name="b2_mat")
334 0 : CALL cp_fm_struct_release(fm_struct_tmp)
335 0 : NULLIFY (fm_struct_tmp)
336 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
337 : para_env=chc%matrix_struct%para_env, &
338 0 : context=chc%matrix_struct%context)
339 : CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, &
340 0 : name="t_mat")
341 : CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, &
342 0 : name="t_vec")
343 0 : CALL cp_fm_struct_release(fm_struct_tmp)
344 0 : NULLIFY (fm_struct_tmp)
345 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
346 : para_env=c0%matrix_struct%para_env, &
347 0 : context=c0%matrix_struct%context)
348 0 : ALLOCATE (v_mat(krylov_space%nkrylov))
349 0 : DO ik = 1, krylov_space%nkrylov
350 : CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
351 0 : name="v_mat")
352 : END DO
353 0 : CALL cp_fm_struct_release(fm_struct_tmp)
354 : ELSE
355 0 : a_mat => krylov_space%block1_mat
356 0 : b_mat => krylov_space%block2_mat
357 0 : b2_mat => krylov_space%block3_mat
358 0 : t_mat => krylov_space%block4_mat
359 0 : t_vec => krylov_space%block5_mat
360 0 : v_mat => krylov_space%v_mat
361 : END IF
362 :
363 0 : ALLOCATE (tblock(nmob, nmob))
364 0 : ALLOCATE (tvblock(nmob, ndim))
365 :
366 0 : CALL timeset(routineN//"_kry_loop", hand2)
367 0 : CALL cp_fm_set_all(b_mat, rzero)
368 0 : CALL cp_fm_set_all(t_mat, rzero)
369 0 : CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
370 :
371 : !Compute A =(V^t)HV
372 0 : CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
373 : CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1), hc, &
374 0 : rzero, a_mat)
375 :
376 : !Compute the residual matrix R for next
377 : !factorisation
378 : CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1), a_mat, &
379 0 : rzero, c_tmp)
380 0 : CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
381 :
382 : ! Build the block tridiagonal matrix
383 0 : CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
384 0 : CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob)
385 :
386 0 : DO ik = 2, krylov_space%nkrylov
387 :
388 : ! Call lapack for QR factorization
389 0 : CALL cp_fm_set_all(b_mat, rzero)
390 0 : CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
391 0 : CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
392 :
393 : CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.TRUE., &
394 0 : n_rows=nao, n_cols=nmob)
395 :
396 : !Compute A =(V^t)HV
397 0 : CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
398 0 : CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik), hc, rzero, a_mat)
399 :
400 : !Compute the !residual matrix R !for next !factorisation
401 : CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik), a_mat, &
402 0 : rzero, c_tmp)
403 0 : CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
404 0 : CALL cp_fm_to_fm(v_mat(ik - 1), hc, nmob, 1, 1)
405 : CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.TRUE., &
406 0 : n_rows=nao, n_cols=nmob, alpha=rmone)
407 0 : CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc)
408 :
409 : ! Build the block tridiagonal matrix
410 0 : it = (ik - 2)*nmob + 1
411 0 : jt = (ik - 1)*nmob + 1
412 :
413 0 : CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
414 0 : CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob)
415 0 : CALL cp_fm_transpose(b_mat, a_mat)
416 0 : CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
417 0 : CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob)
418 :
419 : END DO ! ik
420 0 : CALL timestop(hand2)
421 :
422 0 : DEALLOCATE (tblock)
423 :
424 0 : CALL timeset(routineN//"_diag_tri", hand3)
425 :
426 0 : CALL choose_eigv_solver(t_mat, t_vec, t_eval)
427 : ! Diagonalize the block-tridiagonal matrix
428 0 : CALL timestop(hand3)
429 :
430 0 : CALL timeset(routineN//"_build_cnew", hand4)
431 : ! !Compute the refined vectors
432 0 : CALL cp_fm_set_all(c2_tmp, rzero)
433 0 : DO ik = 1, krylov_space%nkrylov
434 0 : jt = (ik - 1)*nmob
435 : CALL parallel_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik), t_vec, rone, c2_tmp, &
436 0 : b_first_row=(jt + 1))
437 : END DO
438 0 : DEALLOCATE (tvblock)
439 :
440 0 : CALL cp_fm_set_all(c3_tmp, rzero)
441 0 : CALL parallel_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1), c2_tmp, rzero, c3_tmp)
442 :
443 : !Try to avoid linear dependencies
444 0 : ALLOCATE (q_mat(nmob, ndim))
445 : !get max
446 0 : CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim)
447 :
448 0 : ALLOCATE (itaken(ndim))
449 0 : itaken = 0
450 0 : DO it = 1, nmob
451 0 : vmax = 0.0_dp
452 : !select index ik
453 0 : DO jt = 1, ndim
454 0 : IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
455 0 : vmax = ABS(q_mat(it, jt))
456 0 : ik = jt
457 : END IF
458 : END DO
459 0 : itaken(ik) = 1
460 :
461 0 : CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
462 : END DO
463 0 : DEALLOCATE (itaken)
464 0 : DEALLOCATE (q_mat)
465 :
466 : !Copy in the converged set to enlarge the converged subspace
467 0 : CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
468 0 : CALL timestop(hand4)
469 :
470 0 : IF (nmob < nblock) THEN
471 0 : CALL cp_fm_release(a_mat)
472 0 : CALL cp_fm_release(b_mat)
473 0 : CALL cp_fm_release(b2_mat)
474 0 : CALL cp_fm_release(t_mat)
475 0 : CALL cp_fm_release(t_vec)
476 0 : DEALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
477 0 : CALL cp_fm_release(v_mat)
478 : END IF
479 0 : CALL cp_fm_release(c2_tmp)
480 0 : CALL cp_fm_release(c3_tmp)
481 : END DO ! ib
482 :
483 0 : CALL timeset(routineN//"_ortho", hand5)
484 0 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
485 :
486 0 : CALL cp_fm_cholesky_decompose(chc, nmo)
487 0 : CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
488 0 : CALL timestop(hand5)
489 :
490 0 : CALL cp_fm_release(c_tmp)
491 0 : CALL cp_fm_release(hc)
492 : ELSE
493 0 : CALL cp_fm_release(c_tmp)
494 0 : CALL cp_fm_release(hc)
495 0 : CALL timestop(handle)
496 0 : RETURN
497 : END IF
498 :
499 0 : CALL timestop(handle)
500 :
501 0 : END SUBROUTINE lanczos_refinement
502 :
503 : !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
504 :
505 : ! **************************************************************************************************
506 : !> \brief ...
507 : !> \param krylov_space ...
508 : !> \param ks ...
509 : !> \param c0 ...
510 : !> \param c1 ...
511 : !> \param eval ...
512 : !> \param nao ...
513 : !> \param eps_iter ...
514 : !> \param ispin ...
515 : !> \param check_moconv_only ...
516 : ! **************************************************************************************************
517 592 : SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, &
518 : eps_iter, ispin, check_moconv_only)
519 :
520 : TYPE(krylov_space_type), POINTER :: krylov_space
521 : TYPE(cp_fm_type), INTENT(IN) :: ks, c0, c1
522 : REAL(dp), DIMENSION(:), POINTER :: eval
523 : INTEGER, INTENT(IN) :: nao
524 : REAL(dp), INTENT(IN) :: eps_iter
525 : INTEGER, INTENT(IN) :: ispin
526 : LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only
527 :
528 : CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement_2v'
529 : REAL(KIND=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, &
530 : rzero = 0.0_dp
531 :
532 : INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
533 : imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
534 : num_blocks
535 592 : INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken
536 592 : INTEGER, DIMENSION(:), POINTER :: iwork
537 : LOGICAL :: my_check_moconv_only
538 : REAL(dp) :: max_norm, min_norm, vmax
539 592 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: a_block, b_block, q_mat, t_mat
540 592 : REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval
541 592 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
542 592 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a_loc, b_loc
543 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
544 : TYPE(cp_fm_type) :: b_mat, c2_tmp, c_tmp, hc, v_tmp
545 592 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: v_mat
546 : TYPE(cp_fm_type), POINTER :: chc, evec
547 :
548 592 : CALL timeset(routineN, handle)
549 :
550 592 : NULLIFY (fm_struct_tmp)
551 592 : NULLIFY (chc, evec)
552 592 : NULLIFY (c_res, t_eval)
553 592 : NULLIFY (b_loc, a_loc)
554 :
555 592 : nmo = SIZE(eval, 1)
556 592 : my_check_moconv_only = .FALSE.
557 592 : IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
558 :
559 592 : chc => krylov_space%chc_mat(ispin)
560 592 : evec => krylov_space%c_vec(ispin)
561 592 : c_res => krylov_space%c_eval
562 592 : t_eval => krylov_space%t_eval
563 :
564 : NULLIFY (fm_struct_tmp)
565 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
566 : para_env=c0%matrix_struct%para_env, &
567 592 : context=c0%matrix_struct%context)
568 : CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
569 592 : name="c_tmp")
570 : CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
571 592 : name="hc")
572 592 : CALL cp_fm_struct_release(fm_struct_tmp)
573 :
574 : !Compute (C^t)HC
575 592 : CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
576 592 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
577 :
578 : !Diagonalize (C^t)HC
579 592 : CALL timeset(routineN//"diag_chc", hand1)
580 592 : CALL choose_eigv_solver(chc, evec, eval)
581 :
582 592 : CALL timestop(hand1)
583 :
584 592 : CALL timeset(routineN//"check_conv", hand6)
585 : !Rotate the C vectors
586 592 : CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
587 :
588 : !Check for converged states
589 592 : CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
590 592 : CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
591 592 : CALL cp_fm_column_scale(c1, eval)
592 592 : CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
593 592 : CALL cp_fm_vectorsnorm(c_tmp, c_res)
594 :
595 592 : nmo_converged = 0
596 592 : nmo_nc = 0
597 592 : max_norm = 0.0_dp
598 592 : min_norm = 1.e10_dp
599 592 : CALL cp_fm_set_all(c1, rzero)
600 21904 : DO imo = 1, nmo
601 21312 : max_norm = MAX(max_norm, c_res(imo))
602 21904 : min_norm = MIN(min_norm, c_res(imo))
603 : END DO
604 19144 : DO imo = 1, nmo
605 19144 : IF (c_res(imo) <= eps_iter) THEN
606 18552 : nmo_converged = nmo_converged + 1
607 : ELSE
608 582 : nmo_nc = nmo - nmo_converged
609 582 : EXIT
610 : END IF
611 : END DO
612 592 : CALL timestop(hand6)
613 :
614 592 : CALL cp_fm_release(c_tmp)
615 592 : CALL cp_fm_release(hc)
616 :
617 592 : krylov_space%nmo_nc = nmo_nc
618 592 : krylov_space%nmo_conv = nmo_converged
619 592 : krylov_space%max_res_norm = max_norm
620 592 : krylov_space%min_res_norm = min_norm
621 :
622 592 : IF (my_check_moconv_only) THEN
623 : ! Do nothing
624 592 : ELSE IF (krylov_space%nmo_nc > 0) THEN
625 :
626 582 : CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
627 :
628 582 : nblock = krylov_space%nblock
629 582 : IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
630 0 : num_blocks = nmo_nc/nblock + 1
631 : ELSE
632 582 : num_blocks = nmo_nc/nblock
633 : END IF
634 :
635 3342 : DO ib = 1, num_blocks
636 :
637 2760 : imo_low = (ib - 1)*nblock + 1
638 2760 : imo_up = MIN(ib*nblock, nmo_nc)
639 2760 : nmob = imo_up - imo_low + 1
640 2760 : ndim = krylov_space%nkrylov*nmob
641 :
642 : ! Allocation
643 2760 : CALL timeset(routineN//"alloc", hand6)
644 11040 : ALLOCATE (a_block(nmob, nmob))
645 8280 : ALLOCATE (b_block(nmob, nmob))
646 11040 : ALLOCATE (t_mat(ndim, ndim))
647 :
648 2760 : NULLIFY (fm_struct_tmp)
649 : ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes
650 : ! this is due to the use of two-dimensional grids of processes
651 : ! nrow_global is distributed over num_pe(1)
652 : ! a local_data array is anyway allocated for the processes non included
653 : ! this should have a minimum size
654 : ! with ncol_local=1.
655 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
656 : ncol_block=nmob, &
657 : para_env=c0%matrix_struct%para_env, &
658 : context=c0%matrix_struct%context, &
659 2760 : force_block=.TRUE.)
660 : CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
661 2760 : name="c_tmp")
662 2760 : CALL cp_fm_set_all(c_tmp, rzero)
663 : CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, &
664 2760 : name="v_tmp")
665 2760 : CALL cp_fm_set_all(v_tmp, rzero)
666 2760 : CALL cp_fm_struct_release(fm_struct_tmp)
667 2760 : NULLIFY (fm_struct_tmp)
668 30360 : ALLOCATE (v_mat(krylov_space%nkrylov))
669 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
670 : ncol_block=nmob, &
671 : para_env=c0%matrix_struct%para_env, &
672 : context=c0%matrix_struct%context, &
673 2760 : force_block=.TRUE.)
674 24840 : DO ik = 1, krylov_space%nkrylov
675 : CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
676 24840 : name="v_mat")
677 : END DO
678 : CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
679 2760 : name="hc")
680 2760 : CALL cp_fm_struct_release(fm_struct_tmp)
681 2760 : NULLIFY (fm_struct_tmp)
682 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
683 : ncol_block=ndim, &
684 : para_env=c0%matrix_struct%para_env, &
685 : context=c0%matrix_struct%context, &
686 2760 : force_block=.TRUE.)
687 : CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
688 2760 : name="c2_tmp")
689 2760 : CALL cp_fm_struct_release(fm_struct_tmp)
690 :
691 2760 : NULLIFY (fm_struct_tmp)
692 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
693 : para_env=c0%matrix_struct%para_env, &
694 2760 : context=c0%matrix_struct%context)
695 : CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
696 2760 : name="b_mat")
697 2760 : CALL cp_fm_struct_release(fm_struct_tmp)
698 2760 : CALL timestop(hand6)
699 : !End allocation
700 :
701 2760 : CALL cp_fm_set_all(b_mat, rzero)
702 2760 : CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
703 :
704 : ! Here starts the construction of krylov space
705 2760 : CALL timeset(routineN//"_kry_loop", hand2)
706 : !Compute A =(V^t)HV
707 2760 : CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
708 :
709 8280 : a_block = 0.0_dp
710 2760 : a_loc => v_mat(1)%local_data
711 2760 : b_loc => hc%local_data
712 :
713 2760 : IF (SIZE(hc%local_data, 2) == nmob) THEN
714 : ! this is a work around to avoid problems due to the two dimensional grid of processes
715 : CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
716 2760 : SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
717 : END IF
718 2760 : CALL hc%matrix_struct%para_env%sum(a_block)
719 :
720 : !Compute the residual matrix R for next
721 : !factorisation
722 149040 : c_tmp%local_data = 0.0_dp
723 2760 : IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
724 2760 : b_loc => c_tmp%local_data
725 : CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
726 : SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
727 2760 : b_loc(1, 1), SIZE(c_tmp%local_data, 1))
728 : END IF
729 2760 : CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
730 :
731 : ! Build the block tridiagonal matrix
732 201480 : t_mat = 0.0_dp
733 5520 : DO i = 1, nmob
734 8280 : t_mat(1:nmob, i) = a_block(1:nmob, i)
735 : END DO
736 :
737 22080 : DO ik = 2, krylov_space%nkrylov
738 : ! Call lapack for QR factorization
739 19320 : CALL cp_fm_set_all(b_mat, rzero)
740 19320 : CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
741 19320 : CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
742 :
743 : CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.TRUE., &
744 19320 : n_rows=nao, n_cols=nmob)
745 :
746 19320 : CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob)
747 :
748 : !Compute A =(V^t)HV
749 19320 : CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
750 :
751 57960 : a_block = 0.0_dp
752 19320 : IF (SIZE(hc%local_data, 2) == nmob) THEN
753 19320 : a_loc => v_mat(ik)%local_data
754 19320 : b_loc => hc%local_data
755 : CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
756 19320 : SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
757 : END IF
758 19320 : CALL hc%matrix_struct%para_env%sum(a_block)
759 :
760 : !Compute the residual matrix R for next
761 : !factorisation
762 1043280 : c_tmp%local_data = 0.0_dp
763 19320 : IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
764 19320 : a_loc => v_mat(ik)%local_data
765 19320 : b_loc => c_tmp%local_data
766 : CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
767 : SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
768 19320 : b_loc(1, 1), SIZE(c_tmp%local_data, 1))
769 : END IF
770 19320 : CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
771 :
772 19320 : IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
773 19320 : a_loc => v_mat(ik - 1)%local_data
774 38640 : DO j = 1, nmob
775 57960 : DO i = 1, j
776 1043280 : DO ia = 1, SIZE(c_tmp%local_data, 1)
777 1023960 : b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
778 : END DO
779 : END DO
780 : END DO
781 : END IF
782 :
783 : ! Build the block tridiagonal matrix
784 19320 : it = (ik - 2)*nmob
785 19320 : jt = (ik - 1)*nmob
786 41400 : DO j = 1, nmob
787 38640 : t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
788 57960 : DO i = 1, nmob
789 19320 : t_mat(it + i, jt + j) = b_block(j, i)
790 38640 : t_mat(jt + j, it + i) = b_block(j, i)
791 : END DO
792 : END DO
793 : END DO ! ik
794 2760 : CALL timestop(hand2)
795 :
796 2760 : CALL timeset(routineN//"_diag_tri", hand3)
797 2760 : lwork = 1 + 6*ndim + 2*ndim**2 + 5000
798 2760 : liwork = 5*ndim + 3
799 8280 : ALLOCATE (work(lwork))
800 8280 : ALLOCATE (iwork(liwork))
801 :
802 : ! Diagonalize the block-tridiagonal matrix
803 : CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
804 2760 : work(1), lwork, iwork(1), liwork, info)
805 2760 : DEALLOCATE (work)
806 2760 : DEALLOCATE (iwork)
807 2760 : CALL timestop(hand3)
808 :
809 2760 : CALL timeset(routineN//"_build_cnew", hand4)
810 : ! !Compute the refined vectors
811 :
812 1173000 : c2_tmp%local_data = 0.0_dp
813 11040 : ALLOCATE (q_mat(nmob, ndim))
814 46920 : q_mat = 0.0_dp
815 2760 : b_loc => c2_tmp%local_data
816 24840 : DO ik = 1, krylov_space%nkrylov
817 22080 : CALL cp_fm_to_fm(v_mat(ik), v_tmp, nmob, 1, 1)
818 24840 : IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
819 : ! a_loc => v_mat(ik)%local_data
820 22080 : a_loc => v_tmp%local_data
821 22080 : it = (ik - 1)*nmob
822 : CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
823 : SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
824 22080 : b_loc(1, 1), SIZE(c2_tmp%local_data, 1))
825 : END IF
826 : END DO !ik
827 :
828 : !Try to avoid linear dependencies
829 2760 : CALL cp_fm_to_fm(v_mat(1), v_tmp, nmob, 1, 1)
830 2760 : IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
831 : ! a_loc => v_mat(1)%local_data
832 2760 : a_loc => v_tmp%local_data
833 2760 : b_loc => c2_tmp%local_data
834 : CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
835 : SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), &
836 2760 : 0.0_dp, q_mat(1, 1), nmob)
837 : END IF
838 2760 : CALL hc%matrix_struct%para_env%sum(q_mat)
839 :
840 8280 : ALLOCATE (itaken(ndim))
841 24840 : itaken = 0
842 5520 : DO it = 1, nmob
843 : vmax = 0.0_dp
844 : !select index ik
845 24840 : DO jt = 1, ndim
846 24840 : IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
847 3020 : vmax = ABS(q_mat(it, jt))
848 3020 : ik = jt
849 : END IF
850 : END DO
851 2760 : itaken(ik) = 1
852 :
853 5520 : CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
854 : END DO
855 2760 : DEALLOCATE (itaken)
856 2760 : DEALLOCATE (q_mat)
857 :
858 : !Copy in the converged set to enlarge the converged subspace
859 2760 : CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
860 2760 : CALL timestop(hand4)
861 :
862 2760 : CALL cp_fm_release(c2_tmp)
863 2760 : CALL cp_fm_release(c_tmp)
864 2760 : CALL cp_fm_release(hc)
865 2760 : CALL cp_fm_release(v_tmp)
866 2760 : CALL cp_fm_release(b_mat)
867 :
868 2760 : DEALLOCATE (t_mat)
869 2760 : DEALLOCATE (a_block)
870 2760 : DEALLOCATE (b_block)
871 :
872 28182 : CALL cp_fm_release(v_mat)
873 :
874 : END DO ! ib
875 :
876 582 : CALL timeset(routineN//"_ortho", hand5)
877 582 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
878 :
879 582 : CALL cp_fm_cholesky_decompose(chc, nmo)
880 582 : CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
881 582 : CALL timestop(hand5)
882 : ELSE
883 : ! Do nothing
884 : END IF
885 :
886 592 : CALL timestop(handle)
887 1184 : END SUBROUTINE lanczos_refinement_2v
888 :
889 : END MODULE qs_scf_lanczos
|