Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Localization methods such as 2x2 Jacobi rotations
10 : !> Steepest Decents
11 : !> Conjugate Gradient
12 : !> \par History
13 : !> Initial parallellization of jacobi (JVDV 07.2003)
14 : !> direct minimization using exponential parametrization (JVDV 09.2003)
15 : !> crazy rotations go fast (JVDV 10.2003)
16 : !> \author CJM (04.2003)
17 : ! **************************************************************************************************
18 : MODULE qs_localization_methods
19 : USE cell_types, ONLY: cell_type
20 : USE cp_blacs_env, ONLY: cp_blacs_env_type
21 : USE cp_cfm_basic_linalg, ONLY: cp_cfm_column_scale,&
22 : cp_cfm_rot_cols,&
23 : cp_cfm_rot_rows,&
24 : cp_cfm_scale,&
25 : cp_cfm_scale_and_add,&
26 : cp_cfm_schur_product,&
27 : cp_cfm_trace
28 : USE cp_cfm_diag, ONLY: cp_cfm_heevd
29 : USE cp_cfm_types, ONLY: &
30 : cp_cfm_create, cp_cfm_get_element, cp_cfm_get_info, cp_cfm_get_submatrix, cp_cfm_release, &
31 : cp_cfm_set_all, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, cp_cfm_type, &
32 : cp_fm_to_cfm
33 : USE cp_dbcsr_api, ONLY: dbcsr_p_type
34 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply
35 : USE cp_external_control, ONLY: external_control
36 : USE cp_fm_basic_linalg, ONLY: cp_fm_frobenius_norm,&
37 : cp_fm_pdgeqpf,&
38 : cp_fm_pdorgqr,&
39 : cp_fm_scale,&
40 : cp_fm_scale_and_add,&
41 : cp_fm_trace,&
42 : cp_fm_transpose,&
43 : cp_fm_triangular_multiply
44 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
45 : USE cp_fm_diag, ONLY: cp_fm_syevd
46 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
47 : cp_fm_struct_get,&
48 : cp_fm_struct_release,&
49 : cp_fm_struct_type
50 : USE cp_fm_types, ONLY: &
51 : cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_init_random, &
52 : cp_fm_maxabsrownorm, cp_fm_maxabsval, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
53 : cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
54 : USE cp_log_handling, ONLY: cp_logger_get_default_io_unit,&
55 : cp_logger_get_default_unit_nr
56 : USE kahan_sum, ONLY: accurate_sum
57 : USE kinds, ONLY: dp
58 : USE machine, ONLY: m_flush,&
59 : m_walltime
60 : USE mathconstants, ONLY: pi,&
61 : twopi
62 : USE matrix_exp, ONLY: exp_pade_real,&
63 : get_nsquare_norder
64 : USE message_passing, ONLY: mp_para_env_type
65 : USE parallel_gemm_api, ONLY: parallel_gemm
66 : #include "./base/base_uses.f90"
67 :
68 : IMPLICIT NONE
69 : PUBLIC :: initialize_weights, crazy_rotations, &
70 : direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix, &
71 : jacobi_cg_edf_ls
72 :
73 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
74 :
75 : PRIVATE
76 :
77 : TYPE set_c_1d_type
78 : COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => NULL()
79 : END TYPE
80 :
81 : TYPE set_c_2d_type
82 : COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => NULL()
83 : END TYPE
84 :
85 : CONTAINS
86 : ! **************************************************************************************************
87 : !> \brief ...
88 : !> \param C ...
89 : !> \param iterations ...
90 : !> \param eps ...
91 : !> \param converged ...
92 : !> \param sweeps ...
93 : ! **************************************************************************************************
94 210 : SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
95 : TYPE(cp_fm_type), INTENT(IN) :: C
96 : INTEGER, INTENT(IN) :: iterations
97 : REAL(KIND=dp), INTENT(IN) :: eps
98 : LOGICAL, INTENT(INOUT) :: converged
99 : INTEGER, INTENT(INOUT) :: sweeps
100 :
101 : CHARACTER(len=*), PARAMETER :: routineN = 'approx_l1_norm_sd'
102 : INTEGER, PARAMETER :: taylor_order = 100
103 : REAL(KIND=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
104 :
105 : INTEGER :: handle, i, istep, k, n, ncol_local, &
106 : nrow_local, output_unit, p
107 : REAL(KIND=dp) :: expfactor, f2, f2old, gnorm, tnorm
108 : TYPE(cp_blacs_env_type), POINTER :: context
109 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
110 : TYPE(cp_fm_type) :: CTmp, G, Gp1, Gp2, U
111 : TYPE(mp_para_env_type), POINTER :: para_env
112 :
113 30 : CALL timeset(routineN, handle)
114 :
115 30 : NULLIFY (context, para_env, fm_struct_k_k)
116 :
117 30 : output_unit = cp_logger_get_default_io_unit()
118 :
119 : CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
120 : nrow_local=nrow_local, ncol_local=ncol_local, &
121 30 : para_env=para_env, context=context)
122 : CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
123 30 : nrow_global=k, ncol_global=k)
124 30 : CALL cp_fm_create(CTmp, C%matrix_struct)
125 30 : CALL cp_fm_create(U, fm_struct_k_k)
126 30 : CALL cp_fm_create(G, fm_struct_k_k)
127 30 : CALL cp_fm_create(Gp1, fm_struct_k_k)
128 30 : CALL cp_fm_create(Gp2, fm_struct_k_k)
129 : !
130 : ! printing
131 30 : IF (output_unit > 0) THEN
132 15 : WRITE (output_unit, '(1X)')
133 15 : WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
134 15 : WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
135 15 : WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
136 15 : WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
137 15 : WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
138 15 : WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
139 15 : WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
140 : END IF
141 : !
142 30 : f2old = 0.0_dp
143 30 : converged = .FALSE.
144 : !
145 : ! Start the steepest descent
146 1496 : DO istep = 1, iterations
147 : !
148 : !-------------------------------------------------------------------
149 : ! compute f_2
150 : ! f_2(x)=(x^2+eps)^1/2
151 1486 : f2 = 0.0_dp
152 23004 : DO p = 1, ncol_local ! p
153 1113692 : DO i = 1, nrow_local ! i
154 1112206 : f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
155 : END DO
156 : END DO
157 1486 : CALL C%matrix_struct%para_env%sum(f2)
158 : !write(*,*) 'qs_localize: f_2=',f2
159 : !-------------------------------------------------------------------
160 : ! compute the derivative of f_2
161 : ! f_2(x)=(x^2+eps)^1/2
162 23004 : DO p = 1, ncol_local ! p
163 1113692 : DO i = 1, nrow_local ! i
164 1112206 : CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
165 : END DO
166 : END DO
167 1486 : CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
168 : ! antisymmetrize
169 1486 : CALL cp_fm_transpose(G, U)
170 1486 : CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
171 : !
172 : !-------------------------------------------------------------------
173 : !
174 1486 : gnorm = cp_fm_frobenius_norm(G)
175 : !write(*,*) 'qs_localize: norm(G)=',gnorm
176 : !
177 : ! rescale for steepest descent
178 1486 : CALL cp_fm_scale(-alpha, G)
179 : !
180 : ! compute unitary transform
181 : ! zeroth order
182 1486 : CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
183 : ! first order
184 1486 : expfactor = 1.0_dp
185 1486 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
186 1486 : tnorm = cp_fm_frobenius_norm(G)
187 : !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
188 1486 : IF (tnorm .GT. 1.0E-10_dp) THEN
189 : ! other orders
190 1486 : CALL cp_fm_to_fm(G, Gp1)
191 4938 : DO i = 2, taylor_order
192 : ! new power of G
193 4938 : CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
194 4938 : CALL cp_fm_to_fm(Gp2, Gp1)
195 : ! add to the taylor expansion so far
196 4938 : expfactor = expfactor/REAL(i, KIND=dp)
197 4938 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
198 4938 : tnorm = cp_fm_frobenius_norm(Gp1)
199 : !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
200 4938 : IF (tnorm*expfactor .LT. 1.0E-10_dp) EXIT
201 : END DO
202 : END IF
203 : !
204 : ! incrementaly rotate the MOs
205 1486 : CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
206 1486 : CALL cp_fm_to_fm(CTmp, C)
207 : !
208 : ! printing
209 1486 : IF (output_unit .GT. 0) THEN
210 743 : WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
211 : END IF
212 : !
213 : ! Are we done?
214 1486 : sweeps = istep
215 : !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
216 1486 : IF (ABS((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
217 20 : converged = .TRUE.
218 20 : EXIT
219 : END IF
220 1476 : f2old = f2
221 : END DO
222 : !
223 : ! here we should do one refine step to enforce C'*S*C=1 for any case
224 : !
225 : ! Print the final result
226 30 : IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
227 : !
228 : ! sparsity
229 : !DO i=1,size(thresh,1)
230 : ! gnorm = 0.0_dp
231 : ! DO o=1,ncol_local
232 : ! DO p=1,nrow_local
233 : ! IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
234 : ! gnorm = gnorm + 1.0_dp
235 : ! ENDIF
236 : ! ENDDO
237 : ! ENDDO
238 : ! CALL C%matrix_struct%para_env%sum(gnorm)
239 : ! IF(output_unit.GT.0) THEN
240 : ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
241 : ! ENDIF
242 : !ENDDO
243 : !
244 : ! deallocate
245 30 : CALL cp_fm_struct_release(fm_struct_k_k)
246 30 : CALL cp_fm_release(CTmp)
247 30 : CALL cp_fm_release(U)
248 30 : CALL cp_fm_release(G)
249 30 : CALL cp_fm_release(Gp1)
250 30 : CALL cp_fm_release(Gp2)
251 :
252 30 : CALL timestop(handle)
253 :
254 30 : END SUBROUTINE approx_l1_norm_sd
255 : ! **************************************************************************************************
256 : !> \brief ...
257 : !> \param cell ...
258 : !> \param weights ...
259 : ! **************************************************************************************************
260 458 : SUBROUTINE initialize_weights(cell, weights)
261 :
262 : TYPE(cell_type), POINTER :: cell
263 : REAL(KIND=dp), DIMENSION(:) :: weights
264 :
265 : REAL(KIND=dp), DIMENSION(3, 3) :: metric
266 :
267 458 : CPASSERT(ASSOCIATED(cell))
268 :
269 458 : metric = 0.0_dp
270 458 : CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
271 :
272 458 : weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
273 458 : weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
274 458 : weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
275 458 : weights(4) = METRIC(1, 2)
276 458 : weights(5) = METRIC(1, 3)
277 458 : weights(6) = METRIC(2, 3)
278 :
279 458 : END SUBROUTINE initialize_weights
280 :
281 : ! **************************************************************************************************
282 : !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
283 : !> can deal with serial para_envs.
284 : !> \param weights ...
285 : !> \param zij ...
286 : !> \param vectors ...
287 : !> \param para_env ...
288 : !> \param max_iter ...
289 : !> \param eps_localization ...
290 : !> \param sweeps ...
291 : !> \param out_each ...
292 : !> \param target_time ...
293 : !> \param start_time ...
294 : !> \param restricted ...
295 : !> \par History
296 : !> \author Joost VandeVondele (02.2010)
297 : ! **************************************************************************************************
298 388 : SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
299 : eps_localization, sweeps, out_each, target_time, start_time, restricted)
300 :
301 : REAL(KIND=dp), INTENT(IN) :: weights(:)
302 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
303 : TYPE(mp_para_env_type), POINTER :: para_env
304 : INTEGER, INTENT(IN) :: max_iter
305 : REAL(KIND=dp), INTENT(IN) :: eps_localization
306 : INTEGER :: sweeps
307 : INTEGER, INTENT(IN) :: out_each
308 : REAL(dp) :: target_time, start_time
309 : INTEGER :: restricted
310 :
311 388 : IF (para_env%num_pe == 1) THEN
312 : CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
313 0 : sweeps, out_each, restricted=restricted)
314 : ELSE
315 : CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
316 388 : sweeps, out_each, target_time, start_time, restricted=restricted)
317 : END IF
318 :
319 388 : END SUBROUTINE jacobi_rotations
320 :
321 : ! **************************************************************************************************
322 : !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
323 : !> while the routine below works in parallel, it is too slow to be useful
324 : !> \param weights ...
325 : !> \param zij ...
326 : !> \param vectors ...
327 : !> \param max_iter ...
328 : !> \param eps_localization ...
329 : !> \param sweeps ...
330 : !> \param out_each ...
331 : !> \param restricted ...
332 : ! **************************************************************************************************
333 0 : SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
334 : out_each, restricted)
335 : REAL(KIND=dp), INTENT(IN) :: weights(:)
336 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
337 : INTEGER, INTENT(IN) :: max_iter
338 : REAL(KIND=dp), INTENT(IN) :: eps_localization
339 : INTEGER :: sweeps
340 : INTEGER, INTENT(IN) :: out_each
341 : INTEGER :: restricted
342 :
343 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial'
344 :
345 0 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
346 : INTEGER :: dim2, handle, idim, istate, jstate, &
347 : nstate, unit_nr
348 : REAL(KIND=dp) :: ct, st, t1, t2, theta, tolerance
349 : TYPE(cp_cfm_type) :: c_rmat
350 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
351 : TYPE(cp_fm_type) :: rmat
352 :
353 0 : CALL timeset(routineN, handle)
354 :
355 0 : dim2 = SIZE(zij, 2)
356 0 : ALLOCATE (c_zij(dim2))
357 : NULLIFY (mii, mij, mjj)
358 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
359 :
360 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
361 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
362 :
363 0 : CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
364 0 : CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
365 0 : DO idim = 1, dim2
366 0 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
367 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
368 0 : zij(2, idim)%local_data, dp)
369 : END DO
370 :
371 0 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
372 0 : tolerance = 1.0e10_dp
373 :
374 0 : sweeps = 0
375 0 : unit_nr = -1
376 0 : IF (rmat%matrix_struct%para_env%is_source()) THEN
377 0 : unit_nr = cp_logger_get_default_unit_nr()
378 0 : WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
379 : END IF
380 :
381 0 : IF (restricted > 0) THEN
382 0 : unit_nr = cp_logger_get_default_unit_nr()
383 0 : WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
384 0 : nstate = nstate - restricted
385 : END IF
386 :
387 : ! do jacobi sweeps until converged
388 0 : DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
389 0 : sweeps = sweeps + 1
390 0 : t1 = m_walltime()
391 :
392 0 : DO istate = 1, nstate
393 0 : DO jstate = istate + 1, nstate
394 0 : DO idim = 1, dim2
395 0 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
396 0 : CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
397 0 : CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
398 : END DO
399 0 : CALL get_angle(mii, mjj, mij, weights, theta)
400 0 : st = SIN(theta)
401 0 : ct = COS(theta)
402 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij)
403 :
404 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
405 : END DO
406 : END DO
407 :
408 0 : CALL check_tolerance(c_zij, weights, tolerance)
409 :
410 0 : t2 = m_walltime()
411 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
412 : WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
413 0 : "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
414 0 : CALL m_flush(unit_nr)
415 : END IF
416 :
417 : END DO
418 :
419 0 : DO idim = 1, dim2
420 0 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
421 0 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
422 0 : CALL cp_cfm_release(c_zij(idim))
423 : END DO
424 0 : DEALLOCATE (c_zij)
425 0 : DEALLOCATE (mii, mij, mjj)
426 0 : rmat%local_data = REAL(c_rmat%local_data, dp)
427 0 : CALL cp_cfm_release(c_rmat)
428 0 : CALL rotate_orbitals(rmat, vectors)
429 0 : CALL cp_fm_release(rmat)
430 :
431 0 : CALL timestop(handle)
432 :
433 0 : END SUBROUTINE jacobi_rotations_serial
434 : ! **************************************************************************************************
435 : !> \brief very similar to jacobi_rotations_serial with some extra output options
436 : !> \param weights ...
437 : !> \param c_zij ...
438 : !> \param max_iter ...
439 : !> \param c_rmat ...
440 : !> \param eps_localization ...
441 : !> \param tol_out ...
442 : !> \param jsweeps ...
443 : !> \param out_each ...
444 : !> \param c_zij_out ...
445 : !> \param grad_final ...
446 : ! **************************************************************************************************
447 0 : SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
448 0 : tol_out, jsweeps, out_each, c_zij_out, grad_final)
449 : REAL(KIND=dp), INTENT(IN) :: weights(:)
450 : TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
451 : INTEGER, INTENT(IN) :: max_iter
452 : TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
453 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
454 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: tol_out
455 : INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
456 : INTEGER, INTENT(IN), OPTIONAL :: out_each
457 : TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
458 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
459 :
460 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial_1'
461 :
462 : COMPLEX(KIND=dp) :: mzii
463 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
464 : INTEGER :: dim2, handle, idim, istate, jstate, &
465 : nstate, sweeps, unit_nr
466 : REAL(KIND=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
467 : sum_spread_ii, t1, t2, theta, tolerance
468 : TYPE(cp_cfm_type) :: c_rmat_local
469 : TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
470 :
471 0 : CALL timeset(routineN, handle)
472 :
473 0 : dim2 = SIZE(c_zij)
474 : NULLIFY (mii, mij, mjj)
475 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
476 :
477 0 : ALLOCATE (c_zij_local(dim2))
478 0 : CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
479 0 : CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
480 0 : DO idim = 1, dim2
481 0 : CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
482 0 : c_zij_local(idim)%local_data = c_zij(idim)%local_data
483 : END DO
484 :
485 0 : CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
486 0 : tolerance = 1.0e10_dp
487 :
488 0 : IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
489 :
490 0 : sweeps = 0
491 0 : IF (PRESENT(out_each)) THEN
492 0 : unit_nr = -1
493 0 : IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
494 0 : unit_nr = cp_logger_get_default_unit_nr()
495 : END IF
496 0 : alpha = 0.0_dp
497 0 : DO idim = 1, dim2
498 0 : alpha = alpha + weights(idim)
499 : END DO
500 : END IF
501 :
502 : ! do jacobi sweeps until converged
503 0 : DO WHILE (sweeps < max_iter)
504 0 : sweeps = sweeps + 1
505 0 : IF (PRESENT(eps_localization)) THEN
506 0 : IF (tolerance < eps_localization) EXIT
507 : END IF
508 0 : IF (PRESENT(out_each)) t1 = m_walltime()
509 :
510 0 : DO istate = 1, nstate
511 0 : DO jstate = istate + 1, nstate
512 0 : DO idim = 1, dim2
513 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
514 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
515 0 : CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
516 : END DO
517 0 : CALL get_angle(mii, mjj, mij, weights, theta)
518 0 : st = SIN(theta)
519 0 : ct = COS(theta)
520 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
521 :
522 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
523 : END DO
524 : END DO
525 :
526 0 : IF (PRESENT(grad_final)) THEN
527 0 : CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
528 : ELSE
529 0 : CALL check_tolerance(c_zij_local, weights, tolerance)
530 : END IF
531 0 : IF (PRESENT(tol_out)) tol_out = tolerance
532 :
533 0 : IF (PRESENT(out_each)) THEN
534 0 : t2 = m_walltime()
535 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
536 0 : sum_spread_ii = 0.0_dp
537 0 : DO istate = 1, nstate
538 : spread_ii = 0.0_dp
539 0 : DO idim = 1, dim2
540 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
541 : spread_ii = spread_ii + weights(idim)* &
542 0 : ABS(mzii)**2/twopi/twopi
543 : END DO
544 0 : sum_spread_ii = sum_spread_ii + spread_ii
545 : END DO
546 0 : sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
547 0 : avg_spread_ii = sum_spread_ii/nstate
548 : WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
549 0 : "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
550 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
551 0 : sweeps, avg_spread_ii, tolerance, t2 - t1
552 0 : CALL m_flush(unit_nr)
553 : END IF
554 0 : IF (PRESENT(jsweeps)) jsweeps = sweeps
555 : END IF
556 :
557 : END DO
558 :
559 0 : IF (PRESENT(c_zij_out)) THEN
560 0 : DO idim = 1, dim2
561 0 : CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
562 : END DO
563 : END IF
564 0 : CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
565 :
566 0 : DEALLOCATE (mii, mij, mjj)
567 0 : DO idim = 1, dim2
568 0 : CALL cp_cfm_release(c_zij_local(idim))
569 : END DO
570 0 : DEALLOCATE (c_zij_local)
571 0 : CALL cp_cfm_release(c_rmat_local)
572 :
573 0 : CALL timestop(handle)
574 :
575 0 : END SUBROUTINE jacobi_rotations_serial_1
576 : ! **************************************************************************************************
577 : !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
578 : !> for partially occupied wannier functions
579 : !> \param para_env ...
580 : !> \param weights ...
581 : !> \param zij ...
582 : !> \param vectors ...
583 : !> \param max_iter ...
584 : !> \param eps_localization ...
585 : !> \param iter ...
586 : !> \param out_each ...
587 : !> \param nextra ...
588 : !> \param do_cg ...
589 : !> \param nmo ...
590 : !> \param vectors_2 ...
591 : !> \param mos_guess ...
592 : ! **************************************************************************************************
593 2 : SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
594 : iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
595 : TYPE(mp_para_env_type), POINTER :: para_env
596 : REAL(KIND=dp), INTENT(IN) :: weights(:)
597 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
598 : INTEGER, INTENT(IN) :: max_iter
599 : REAL(KIND=dp), INTENT(IN) :: eps_localization
600 : INTEGER :: iter
601 : INTEGER, INTENT(IN) :: out_each, nextra
602 : LOGICAL, INTENT(IN) :: do_cg
603 : INTEGER, INTENT(IN), OPTIONAL :: nmo
604 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
605 :
606 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_cg_edf_ls'
607 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
608 : czero = (0.0_dp, 0.0_dp)
609 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
610 :
611 : COMPLEX(KIND=dp) :: cnorm2_Gct, cnorm2_Gct_cross, mzii
612 2 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
613 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
614 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
615 : INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
616 : lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
617 : INTEGER, DIMENSION(1) :: iloc
618 : LOGICAL :: do_cinit_mo, do_cinit_random, &
619 : do_U_guess_mo, new_direction
620 : REAL(KIND=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_Gct, &
621 : norm2_Gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
622 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
623 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
624 : REAL(KIND=dp), DIMENSION(50) :: energy, pos
625 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: tmp_arr
626 : TYPE(cp_blacs_env_type), POINTER :: context
627 : TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, Gct_old, &
628 : grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
629 : tmp_cfm_2, U, UL, V, VL, zdiag
630 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
631 : TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
632 : TYPE(cp_fm_type) :: id_nextra, matrix_U, matrix_V, &
633 : matrix_V_all, rmat, tmp_fm, vectors_all
634 :
635 2 : CALL timeset(routineN, handle)
636 :
637 2 : dim2 = SIZE(zij, 2)
638 2 : NULLIFY (context)
639 2 : NULLIFY (matrix_zii, arr_zii)
640 2 : NULLIFY (tmp_fm_struct)
641 2 : NULLIFY (tmp_arr)
642 :
643 12 : ALLOCATE (c_zij(dim2))
644 :
645 2 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
646 :
647 6 : ALLOCATE (sum_spread(nstate))
648 8 : ALLOCATE (matrix_zii(nstate, dim2))
649 266 : matrix_zii = czero
650 88 : sum_spread = 0.0_dp
651 :
652 : alpha = 0.0_dp
653 8 : DO idim = 1, dim2
654 6 : alpha = alpha + weights(idim)
655 6 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
656 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
657 5813 : zij(2, idim)%local_data, dp)
658 : END DO
659 :
660 10 : ALLOCATE (zij_0(dim2))
661 :
662 2 : CALL cp_cfm_create(U, zij(1, 1)%matrix_struct)
663 2 : CALL cp_fm_create(matrix_U, zij(1, 1)%matrix_struct)
664 :
665 2 : CALL cp_cfm_set_all(U, czero, cone)
666 2 : CALL cp_fm_set_all(matrix_U, 0.0_dp, 1.0_dp)
667 :
668 2 : CALL cp_fm_get_info(vectors, nrow_global=nao)
669 2 : IF (nextra > 0) THEN
670 2 : IF (PRESENT(mos_guess)) THEN
671 2 : do_cinit_random = .FALSE.
672 2 : do_cinit_mo = .TRUE.
673 2 : CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
674 : ELSE
675 0 : do_cinit_random = .TRUE.
676 0 : do_cinit_mo = .FALSE.
677 0 : ndummy = nstate
678 : END IF
679 :
680 : IF (do_cinit_random) THEN
681 : icinit = 1
682 : do_U_guess_mo = .FALSE.
683 : ELSEIF (do_cinit_mo) THEN
684 2 : icinit = 2
685 2 : do_U_guess_mo = .TRUE.
686 : END IF
687 :
688 2 : nocc = nstate - nextra
689 2 : northo = nmo - nocc
690 2 : norextra = nmo - nstate
691 2 : CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
692 :
693 8 : ALLOCATE (tmp_cmat(nstate, nstate))
694 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
695 2 : para_env=para_env, context=context)
696 8 : DO idim = 1, dim2
697 6 : CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
698 6 : CALL cp_cfm_set_all(zij_0(idim), czero, cone)
699 6 : CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
700 8 : CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
701 : END DO
702 2 : CALL cp_fm_struct_release(tmp_fm_struct)
703 2 : DEALLOCATE (tmp_cmat)
704 :
705 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
706 2 : para_env=para_env, context=context)
707 2 : CALL cp_cfm_create(V, tmp_fm_struct)
708 2 : CALL cp_fm_create(matrix_V, tmp_fm_struct)
709 2 : CALL cp_cfm_create(zdiag, tmp_fm_struct)
710 2 : CALL cp_fm_create(rmat, tmp_fm_struct)
711 2 : CALL cp_fm_struct_release(tmp_fm_struct)
712 2 : CALL cp_cfm_set_all(V, czero, cone)
713 2 : CALL cp_fm_set_all(matrix_V, 0.0_dp, 1.0_dp)
714 :
715 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
716 2 : para_env=para_env, context=context)
717 2 : CALL cp_fm_create(matrix_V_all, tmp_fm_struct)
718 2 : CALL cp_fm_struct_release(tmp_fm_struct)
719 2 : CALL cp_fm_set_all(matrix_V_all, 0._dp, 1._dp)
720 :
721 6 : ALLOCATE (arr_zii(nstate))
722 :
723 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
724 2 : para_env=para_env, context=context)
725 2 : CALL cp_cfm_create(c_tilde, tmp_fm_struct)
726 2 : CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
727 2 : CALL cp_cfm_create(Gct_old, tmp_fm_struct)
728 2 : CALL cp_cfm_create(skc, tmp_fm_struct)
729 2 : CALL cp_fm_struct_release(tmp_fm_struct)
730 2 : CALL cp_cfm_set_all(c_tilde, czero)
731 2 : CALL cp_cfm_set_all(Gct_old, czero)
732 2 : CALL cp_cfm_set_all(skc, czero)
733 :
734 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
735 2 : para_env=para_env, context=context)
736 2 : CALL cp_cfm_create(VL, tmp_fm_struct)
737 2 : CALL cp_cfm_set_all(VL, czero)
738 2 : CALL cp_fm_struct_release(tmp_fm_struct)
739 :
740 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
741 2 : para_env=para_env, context=context)
742 2 : CALL cp_fm_create(id_nextra, tmp_fm_struct)
743 2 : CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
744 2 : CALL cp_fm_struct_release(tmp_fm_struct)
745 2 : CALL cp_cfm_set_all(ctrans_lambda, czero)
746 2 : CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
747 :
748 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
749 2 : para_env=para_env, context=context)
750 2 : CALL cp_cfm_create(UL, tmp_fm_struct)
751 2 : CALL cp_fm_struct_release(tmp_fm_struct)
752 2 : CALL cp_cfm_set_all(UL, czero)
753 :
754 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
755 2 : para_env=para_env, context=context)
756 2 : CALL cp_fm_create(vectors_all, tmp_fm_struct)
757 2 : CALL cp_fm_struct_release(tmp_fm_struct)
758 8 : ALLOCATE (tmp_mat(nao, nstate))
759 2 : CALL cp_fm_get_submatrix(vectors, tmp_mat)
760 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
761 2 : DEALLOCATE (tmp_mat)
762 8 : ALLOCATE (tmp_mat(nao, norextra))
763 2 : CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
764 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
765 2 : DEALLOCATE (tmp_mat)
766 :
767 : ! initialize c_tilde
768 14 : SELECT CASE (icinit)
769 : CASE (1) ! random coefficients
770 : !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
771 0 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
772 0 : CALL cp_fm_init_random(tmp_fm, nextra)
773 0 : CALL ortho_vectors(tmp_fm)
774 0 : c_tilde%local_data = tmp_fm%local_data
775 0 : CALL cp_fm_release(tmp_fm)
776 0 : ALLOCATE (tmp_cmat(northo, nextra))
777 0 : CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
778 0 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
779 0 : DEALLOCATE (tmp_cmat)
780 : CASE (2) ! MO based coeffs
781 2 : CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_V_all)
782 6 : ALLOCATE (tmp_arr(nmo))
783 8 : ALLOCATE (tmp_mat(nmo, ndummy))
784 8 : ALLOCATE (tmp_mat_1(nmo, nstate))
785 : ! normalize matrix_V_all
786 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat)
787 88 : DO istate = 1, ndummy
788 4558 : tmp_arr(:) = tmp_mat(:, istate)
789 4558 : norm = norm2(tmp_arr)
790 4558 : tmp_arr(:) = tmp_arr(:)/norm
791 4560 : tmp_mat(:, istate) = tmp_arr(:)
792 : END DO
793 2 : CALL cp_fm_set_submatrix(matrix_V_all, tmp_mat)
794 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat_1, 1, 1, nmo, nstate)
795 2 : CALL cp_fm_set_submatrix(matrix_V, tmp_mat_1)
796 2 : DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
797 2 : CALL cp_fm_to_cfm(msourcer=matrix_V, mtarget=V)
798 8 : ALLOCATE (tmp_mat(northo, ndummy))
799 8 : ALLOCATE (tmp_mat_1(northo, nextra))
800 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat, nocc + 1, 1, northo, ndummy)
801 6 : ALLOCATE (tmp_arr(ndummy))
802 88 : tmp_arr = 0.0_dp
803 88 : DO istate = 1, ndummy
804 1034 : tmp_arr(istate) = norm2(tmp_mat(:, istate))
805 : END DO
806 : ! find edfs
807 6 : DO istate = 1, nextra
808 180 : iloc = MAXLOC(tmp_arr)
809 48 : tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
810 6 : tmp_arr(iloc(1)) = 0.0_dp
811 : END DO
812 :
813 2 : DEALLOCATE (tmp_arr, tmp_mat)
814 :
815 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
816 2 : para_env=para_env, context=context)
817 2 : CALL cp_fm_create(tmp_fm, tmp_fm_struct)
818 2 : CALL cp_fm_struct_release(tmp_fm_struct)
819 2 : CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
820 2 : DEALLOCATE (tmp_mat_1)
821 2 : CALL ortho_vectors(tmp_fm)
822 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
823 2 : CALL cp_fm_release(tmp_fm)
824 : ! initialize U
825 2 : IF (do_U_guess_mo) THEN
826 8 : ALLOCATE (tmp_cmat(nocc, nstate))
827 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
828 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
829 2 : DEALLOCATE (tmp_cmat)
830 8 : ALLOCATE (tmp_cmat(northo, nstate))
831 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
832 2 : CALL cp_cfm_set_submatrix(VL, tmp_cmat, 1, 1, northo, nstate)
833 2 : DEALLOCATE (tmp_cmat)
834 2 : CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, VL, czero, UL)
835 8 : ALLOCATE (tmp_cmat(nextra, nstate))
836 2 : CALL cp_cfm_get_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
837 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
838 2 : DEALLOCATE (tmp_cmat)
839 2 : CALL cp_fm_create(tmp_fm, U%matrix_struct)
840 1937 : tmp_fm%local_data = REAL(U%local_data, KIND=dp)
841 2 : CALL ortho_vectors(tmp_fm)
842 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=U)
843 2 : CALL cp_fm_release(tmp_fm)
844 4 : CALL cp_cfm_to_fm(U, matrix_U)
845 : END IF
846 : ! reevaluate V
847 8 : ALLOCATE (tmp_cmat(nocc, nstate))
848 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
849 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
850 2 : DEALLOCATE (tmp_cmat)
851 8 : ALLOCATE (tmp_cmat(nextra, nstate))
852 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
853 2 : CALL cp_cfm_set_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
854 2 : DEALLOCATE (tmp_cmat)
855 2 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
856 8 : ALLOCATE (tmp_cmat(northo, nstate))
857 2 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
858 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
859 6 : DEALLOCATE (tmp_cmat)
860 : END SELECT
861 : ELSE
862 0 : DO idim = 1, dim2
863 0 : CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
864 0 : CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
865 : END DO
866 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
867 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
868 : END IF
869 :
870 2 : unit_nr = -1
871 2 : IF (rmat%matrix_struct%para_env%is_source()) THEN
872 1 : unit_nr = cp_logger_get_default_unit_nr()
873 1 : WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
874 : END IF
875 :
876 2 : norm2_old = 1.0E30_dp
877 2 : ds_min = 1.0_dp
878 2 : new_direction = .TRUE.
879 2 : iter = 0
880 2 : line_searches = 0
881 2 : line_search_count = 0
882 2 : tol = 1.0E+20_dp
883 2 : mintol = 1.0E+10_dp
884 2 : miniter = 0
885 :
886 : !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
887 : ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
888 :
889 : ! do conjugate gradient until converged
890 34 : DO WHILE (iter < max_iter)
891 34 : iter = iter + 1
892 : !WRITE(*,*) 'iter = ', iter
893 34 : t1 = m_walltime()
894 :
895 34 : IF (iter > 1) THEN
896 : ! comput U
897 32 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
898 32 : CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
899 32 : IF (para_env%num_pe == 1) THEN
900 0 : CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
901 : ELSE
902 32 : CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
903 : END IF
904 32 : CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, U, tmp_cfm_2, czero, tmp_cfm)
905 32 : CALL cp_cfm_to_cfm(tmp_cfm, U)
906 32 : CALL cp_cfm_release(tmp_cfm)
907 32 : CALL cp_cfm_release(tmp_cfm_2)
908 : END IF
909 :
910 34 : IF (nextra > 0) THEN
911 136 : ALLOCATE (tmp_cmat(nextra, nstate))
912 34 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
913 34 : CALL cp_cfm_set_submatrix(UL, tmp_cmat)
914 34 : DEALLOCATE (tmp_cmat)
915 34 : IF (iter > 1) THEN
916 : ! orthonormalize c_tilde
917 32 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
918 480 : tmp_fm%local_data = REAL(c_tilde%local_data, KIND=dp)
919 32 : CALL ortho_vectors(tmp_fm)
920 32 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
921 32 : CALL cp_fm_release(tmp_fm)
922 :
923 128 : ALLOCATE (tmp_cmat(nocc, nstate))
924 32 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
925 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
926 32 : DEALLOCATE (tmp_cmat)
927 32 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
928 128 : ALLOCATE (tmp_cmat(northo, nstate))
929 32 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
930 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
931 64 : DEALLOCATE (tmp_cmat)
932 : END IF
933 :
934 : ! reset if new_direction
935 34 : IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN
936 0 : CALL cp_cfm_set_all(skc, czero)
937 0 : CALL cp_cfm_set_all(Gct_old, czero)
938 0 : norm2_old = 1.0E30_dp
939 : END IF
940 :
941 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
942 34 : CALL cp_cfm_to_cfm(V, tmp_cfm)
943 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
944 68 : ndummy = nmo
945 : ELSE
946 0 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
947 0 : CALL cp_cfm_to_cfm(U, tmp_cfm)
948 0 : CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
949 0 : ndummy = nstate
950 : END IF
951 : ! update z_ij
952 136 : DO idim = 1, dim2
953 : ! 'tmp_cfm_1 = zij_0*tmp_cfm'
954 : CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
955 102 : tmp_cfm, czero, tmp_cfm_1)
956 : ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
957 : CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
958 136 : czero, c_zij(idim))
959 : END DO
960 34 : CALL cp_cfm_release(tmp_cfm)
961 34 : CALL cp_cfm_release(tmp_cfm_1)
962 : ! compute spread
963 1496 : DO istate = 1, nstate
964 1462 : spread_ii = 0.0_dp
965 5848 : DO idim = 1, dim2
966 4386 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
967 : spread_ii = spread_ii + weights(idim)* &
968 4386 : ABS(mzii)**2/twopi/twopi
969 5848 : matrix_zii(istate, idim) = mzii
970 : END DO
971 : !WRITE(*,*) 'spread_ii', spread_ii
972 1496 : sum_spread(istate) = spread_ii
973 : END DO
974 34 : CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
975 34 : spread_sum = accurate_sum(sum_spread)
976 :
977 34 : IF (nextra > 0) THEN
978 : ! update c_tilde
979 34 : CALL cp_cfm_set_all(zdiag, czero)
980 34 : CALL cp_cfm_set_all(grad_ctilde, czero)
981 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
982 34 : CALL cp_cfm_set_all(tmp_cfm, czero)
983 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
984 34 : CALL cp_cfm_set_all(tmp_cfm_1, czero)
985 136 : ALLOCATE (tmp_cmat(northo, nstate))
986 136 : DO idim = 1, dim2
987 102 : weight = weights(idim)
988 8874 : arr_zii = matrix_zii(:, idim)
989 : ! tmp_cfm = zij_0*V
990 : CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
991 102 : zij_0(idim), V, czero, tmp_cfm)
992 : ! tmp_cfm = tmp_cfm*diag_zij_dagg
993 4488 : CALL cp_cfm_column_scale(tmp_cfm, CONJG(arr_zii))
994 : ! tmp_cfm_1 = tmp_cfm*U_dagg
995 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
996 102 : U, czero, tmp_cfm_1)
997 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
998 : ! zdiag = zdiag + tmp_cfm_1'
999 102 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1000 :
1001 : ! tmp_cfm = zij_0_dagg*V
1002 : CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1003 102 : zij_0(idim), V, czero, tmp_cfm)
1004 :
1005 : ! tmp_cfm = tmp_cfm*diag_zij
1006 102 : CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1007 : ! tmp_cfm_1 = tmp_cfm*U_dagg
1008 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1009 102 : U, czero, tmp_cfm_1)
1010 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
1011 : ! zdiag = zdiag + tmp_cfm_1'
1012 136 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1013 : END DO ! idim
1014 34 : CALL cp_cfm_release(tmp_cfm)
1015 34 : CALL cp_cfm_release(tmp_cfm_1)
1016 34 : DEALLOCATE (tmp_cmat)
1017 136 : ALLOCATE (tmp_cmat(northo, nextra))
1018 : CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1019 34 : northo, nextra, .FALSE.)
1020 : ! 'grad_ctilde'
1021 34 : CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1022 34 : DEALLOCATE (tmp_cmat)
1023 : ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1024 34 : CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1025 : !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1026 : ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1027 102 : CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1028 : END IF ! nextra > 0
1029 :
1030 : ! tolerance
1031 34 : IF (nextra > 0) THEN
1032 : tolc = 0.0_dp
1033 34 : CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1034 34 : CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1035 34 : CALL cp_fm_maxabsval(tmp_fm, tolc)
1036 34 : CALL cp_fm_release(tmp_fm)
1037 : !WRITE(*,*) 'tolc = ', tolc
1038 34 : tol = tol + tolc
1039 : END IF
1040 : !WRITE(*,*) 'tol = ', tol
1041 :
1042 36 : IF (nextra > 0) THEN
1043 : !WRITE(*,*) 'new_direction: ', new_direction
1044 34 : IF (new_direction) THEN
1045 6 : line_searches = line_searches + 1
1046 6 : IF (mintol > tol) THEN
1047 4 : mintol = tol
1048 4 : miniter = iter
1049 : END IF
1050 :
1051 6 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1052 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1053 0 : avg_spread_ii = sum_spread_ii/nstate
1054 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1055 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1056 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1057 0 : iter, avg_spread_ii, tol
1058 0 : CALL m_flush(unit_nr)
1059 : END IF
1060 6 : IF (tol < eps_localization) EXIT
1061 :
1062 4 : IF (do_cg) THEN
1063 : cnorm2_Gct = czero
1064 : cnorm2_Gct_cross = czero
1065 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct_cross)
1066 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1067 60 : Gct_old%local_data = grad_ctilde%local_data
1068 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct)
1069 4 : norm2_Gct = REAL(cnorm2_Gct, KIND=dp)
1070 : ! compute beta_pr
1071 4 : beta_pr = (norm2_Gct - norm2_Gct_cross)/norm2_old
1072 4 : norm2_old = norm2_Gct
1073 4 : beta = MAX(0.0_dp, beta_pr)
1074 : !WRITE(*,*) 'beta = ', beta
1075 : ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1076 4 : CALL cp_cfm_scale(beta, skc)
1077 4 : CALL cp_cfm_scale_and_add(cone, skc, cone, Gct_old)
1078 4 : CALL cp_cfm_trace(skc, Gct_old, cnorm2_Gct_cross)
1079 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1080 4 : IF (norm2_Gct_cross .LE. 0.0_dp) THEN ! back to steepest ascent
1081 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, Gct_old)
1082 : END IF
1083 : ELSE
1084 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1085 : END IF
1086 : line_search_count = 0
1087 : END IF
1088 :
1089 32 : line_search_count = line_search_count + 1
1090 : !WRITE(*,*) 'line_search_count = ', line_search_count
1091 32 : energy(line_search_count) = spread_sum
1092 :
1093 : ! gold line search
1094 32 : new_direction = .FALSE.
1095 32 : IF (line_search_count .EQ. 1) THEN
1096 4 : lsl = 1
1097 4 : lsr = 0
1098 4 : lsm = 1
1099 4 : pos(1) = 0.0_dp
1100 4 : pos(2) = ds_min/gold_sec
1101 4 : ds = pos(2)
1102 : ELSE
1103 28 : IF (line_search_count .EQ. 50) THEN
1104 0 : IF (ABS(energy(line_search_count) - energy(line_search_count - 1)) < 1.0E-4_dp) THEN
1105 0 : CPWARN("Line search failed to converge properly")
1106 0 : ds_min = 0.1_dp
1107 0 : new_direction = .TRUE.
1108 : ds = pos(line_search_count)
1109 0 : line_search_count = 0
1110 : ELSE
1111 0 : CPABORT("No. of line searches exceeds 50")
1112 : END IF
1113 : ELSE
1114 28 : IF (lsr .EQ. 0) THEN
1115 28 : IF (energy(line_search_count - 1) .GT. energy(line_search_count)) THEN
1116 0 : lsr = line_search_count
1117 0 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1118 : ELSE
1119 28 : lsl = lsm
1120 28 : lsm = line_search_count
1121 28 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1122 : END IF
1123 : ELSE
1124 0 : IF (pos(line_search_count) .LT. pos(lsm)) THEN
1125 0 : IF (energy(line_search_count) .GT. energy(lsm)) THEN
1126 : lsr = lsm
1127 : lsm = line_search_count
1128 : ELSE
1129 0 : lsl = line_search_count
1130 : END IF
1131 : ELSE
1132 0 : IF (energy(line_search_count) .GT. energy(lsm)) THEN
1133 : lsl = lsm
1134 : lsm = line_search_count
1135 : ELSE
1136 0 : lsr = line_search_count
1137 : END IF
1138 : END IF
1139 0 : IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
1140 0 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1141 : ELSE
1142 0 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1143 : END IF
1144 0 : IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN
1145 0 : new_direction = .TRUE.
1146 : END IF
1147 : END IF ! lsr .eq. 0
1148 : END IF ! line_search_count .eq. 50
1149 : ! now go to the suggested point
1150 28 : ds = pos(line_search_count + 1) - pos(line_search_count)
1151 : !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1152 28 : IF ((ABS(ds) < 1.0E-10_dp) .AND. (lsl == 1)) THEN
1153 0 : new_direction = .TRUE.
1154 0 : ds_min = 0.5_dp/alpha
1155 28 : ELSEIF (ABS(ds) > 10.0_dp) THEN
1156 4 : new_direction = .TRUE.
1157 4 : ds_min = 0.5_dp/alpha
1158 : ELSE
1159 : ds_min = pos(line_search_count + 1)
1160 : END IF
1161 : END IF ! first step
1162 : ! 'c_tilde = c_tilde + d*skc'
1163 32 : CALL cp_cfm_scale(ds, skc)
1164 32 : CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1165 : ELSE
1166 0 : IF (mintol > tol) THEN
1167 0 : mintol = tol
1168 0 : miniter = iter
1169 : END IF
1170 0 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1171 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1172 0 : avg_spread_ii = sum_spread_ii/nstate
1173 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1174 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1175 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1176 0 : iter, avg_spread_ii, tol
1177 0 : CALL m_flush(unit_nr)
1178 : END IF
1179 0 : IF (tol < eps_localization) EXIT
1180 : END IF ! nextra > 0
1181 :
1182 : END DO ! iteration
1183 :
1184 2 : IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1185 0 : WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1186 0 : WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1187 0 : CALL m_flush(unit_nr)
1188 : END IF
1189 :
1190 2 : CALL cp_cfm_to_fm(U, matrix_U)
1191 :
1192 2 : IF (nextra > 0) THEN
1193 2367 : rmat%local_data = REAL(V%local_data, KIND=dp)
1194 2 : CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1195 :
1196 2 : CALL cp_cfm_release(c_tilde)
1197 2 : CALL cp_cfm_release(grad_ctilde)
1198 2 : CALL cp_cfm_release(Gct_old)
1199 2 : CALL cp_cfm_release(skc)
1200 2 : CALL cp_cfm_release(UL)
1201 2 : CALL cp_cfm_release(zdiag)
1202 2 : CALL cp_cfm_release(ctrans_lambda)
1203 2 : CALL cp_fm_release(id_nextra)
1204 2 : CALL cp_fm_release(vectors_all)
1205 2 : CALL cp_cfm_release(V)
1206 2 : CALL cp_fm_release(matrix_V)
1207 2 : CALL cp_fm_release(matrix_V_all)
1208 2 : CALL cp_cfm_release(VL)
1209 2 : DEALLOCATE (arr_zii)
1210 : ELSE
1211 0 : rmat%local_data = matrix_U%local_data
1212 0 : CALL rotate_orbitals(rmat, vectors)
1213 : END IF
1214 8 : DO idim = 1, dim2
1215 8 : CALL cp_cfm_release(zij_0(idim))
1216 : END DO
1217 2 : DEALLOCATE (zij_0)
1218 :
1219 8 : DO idim = 1, dim2
1220 5811 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
1221 5811 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
1222 8 : CALL cp_cfm_release(c_zij(idim))
1223 : END DO
1224 2 : DEALLOCATE (c_zij)
1225 2 : CALL cp_fm_release(rmat)
1226 2 : CALL cp_cfm_release(U)
1227 2 : CALL cp_fm_release(matrix_U)
1228 2 : DEALLOCATE (matrix_zii, sum_spread)
1229 :
1230 2 : CALL timestop(handle)
1231 :
1232 10 : END SUBROUTINE jacobi_cg_edf_ls
1233 :
1234 : ! **************************************************************************************************
1235 : !> \brief ...
1236 : !> \param vmatrix ...
1237 : ! **************************************************************************************************
1238 108 : SUBROUTINE ortho_vectors(vmatrix)
1239 :
1240 : TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1241 :
1242 : CHARACTER(LEN=*), PARAMETER :: routineN = 'ortho_vectors'
1243 :
1244 : INTEGER :: handle, n, ncol
1245 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1246 : TYPE(cp_fm_type) :: overlap_vv
1247 :
1248 36 : CALL timeset(routineN, handle)
1249 :
1250 36 : NULLIFY (fm_struct_tmp)
1251 :
1252 36 : CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1253 :
1254 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1255 : para_env=vmatrix%matrix_struct%para_env, &
1256 36 : context=vmatrix%matrix_struct%context)
1257 36 : CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1258 36 : CALL cp_fm_struct_release(fm_struct_tmp)
1259 :
1260 36 : CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1261 36 : CALL cp_fm_cholesky_decompose(overlap_vv)
1262 36 : CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.TRUE.)
1263 :
1264 36 : CALL cp_fm_release(overlap_vv)
1265 :
1266 36 : CALL timestop(handle)
1267 :
1268 36 : END SUBROUTINE ortho_vectors
1269 :
1270 : ! **************************************************************************************************
1271 : !> \brief ...
1272 : !> \param istate ...
1273 : !> \param jstate ...
1274 : !> \param st ...
1275 : !> \param ct ...
1276 : !> \param zij ...
1277 : ! **************************************************************************************************
1278 0 : SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1279 : INTEGER, INTENT(IN) :: istate, jstate
1280 : REAL(KIND=dp), INTENT(IN) :: st, ct
1281 : TYPE(cp_cfm_type) :: zij(:)
1282 :
1283 : INTEGER :: id
1284 :
1285 : ! Locals
1286 :
1287 0 : DO id = 1, SIZE(zij, 1)
1288 0 : CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1289 0 : CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1290 : END DO
1291 :
1292 0 : END SUBROUTINE rotate_zij
1293 : ! **************************************************************************************************
1294 : !> \brief ...
1295 : !> \param istate ...
1296 : !> \param jstate ...
1297 : !> \param st ...
1298 : !> \param ct ...
1299 : !> \param rmat ...
1300 : ! **************************************************************************************************
1301 0 : SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1302 : INTEGER, INTENT(IN) :: istate, jstate
1303 : REAL(KIND=dp), INTENT(IN) :: st, ct
1304 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
1305 :
1306 0 : CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1307 :
1308 0 : END SUBROUTINE rotate_rmat
1309 : ! **************************************************************************************************
1310 : !> \brief ...
1311 : !> \param mii ...
1312 : !> \param mjj ...
1313 : !> \param mij ...
1314 : !> \param weights ...
1315 : !> \param theta ...
1316 : !> \param grad_ij ...
1317 : !> \param step ...
1318 : ! **************************************************************************************************
1319 2213254 : SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1320 : COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1321 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1322 : REAL(KIND=dp), INTENT(OUT) :: theta
1323 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1324 :
1325 : COMPLEX(KIND=dp) :: z11, z12, z22
1326 : INTEGER :: dim_m, idim
1327 : REAL(KIND=dp) :: a12, b12, d2, ratio
1328 :
1329 2213254 : a12 = 0.0_dp
1330 2213254 : b12 = 0.0_dp
1331 2213254 : dim_m = SIZE(mii)
1332 8959510 : DO idim = 1, dim_m
1333 6746256 : z11 = mii(idim)
1334 6746256 : z22 = mjj(idim)
1335 6746256 : z12 = mij(idim)
1336 6746256 : a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
1337 : b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
1338 8959510 : 0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
1339 : END DO
1340 2213254 : IF (ABS(b12) > 1.e-10_dp) THEN
1341 2213254 : ratio = -a12/b12
1342 2213254 : theta = 0.25_dp*ATAN(ratio)
1343 0 : ELSEIF (ABS(b12) < 1.e-10_dp) THEN
1344 0 : b12 = 0.0_dp
1345 0 : theta = 0.0_dp
1346 : ELSE
1347 0 : theta = 0.25_dp*pi
1348 : END IF
1349 2213254 : IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1350 : ! Check second derivative info
1351 2213254 : d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
1352 2213254 : IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1353 3238 : IF (theta > 0.0_dp) THEN ! make theta as small as possible
1354 1602 : theta = theta - 0.25_dp*pi
1355 : ELSE
1356 1636 : theta = theta + 0.25_dp*pi
1357 : END IF
1358 : END IF
1359 2213254 : END SUBROUTINE get_angle
1360 : ! **************************************************************************************************
1361 : !> \brief ...
1362 : !> \param zij ...
1363 : !> \param weights ...
1364 : !> \param tolerance ...
1365 : !> \param grad ...
1366 : ! **************************************************************************************************
1367 0 : SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1368 : TYPE(cp_cfm_type) :: zij(:)
1369 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1370 : REAL(KIND=dp), INTENT(OUT) :: tolerance
1371 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1372 :
1373 : CHARACTER(len=*), PARAMETER :: routineN = 'check_tolerance'
1374 :
1375 : INTEGER :: handle
1376 : TYPE(cp_fm_type) :: force
1377 :
1378 0 : CALL timeset(routineN, handle)
1379 :
1380 : ! compute gradient at t=0
1381 :
1382 0 : CALL cp_fm_create(force, zij(1)%matrix_struct)
1383 0 : CALL cp_fm_set_all(force, 0._dp)
1384 0 : CALL grad_at_0(zij, weights, force)
1385 0 : CALL cp_fm_maxabsval(force, tolerance)
1386 0 : IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1387 0 : CALL cp_fm_release(force)
1388 :
1389 0 : CALL timestop(handle)
1390 :
1391 0 : END SUBROUTINE check_tolerance
1392 : ! **************************************************************************************************
1393 : !> \brief ...
1394 : !> \param rmat ...
1395 : !> \param vectors ...
1396 : ! **************************************************************************************************
1397 1048 : SUBROUTINE rotate_orbitals(rmat, vectors)
1398 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1399 :
1400 : INTEGER :: k, n
1401 : TYPE(cp_fm_type) :: wf
1402 :
1403 524 : CALL cp_fm_create(wf, vectors%matrix_struct)
1404 524 : CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1405 524 : CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1406 524 : CALL cp_fm_to_fm(wf, vectors)
1407 524 : CALL cp_fm_release(wf)
1408 524 : END SUBROUTINE rotate_orbitals
1409 : ! **************************************************************************************************
1410 : !> \brief ...
1411 : !> \param rmat ...
1412 : !> \param vec_all ...
1413 : !> \param vectors ...
1414 : ! **************************************************************************************************
1415 6 : SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1416 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1417 :
1418 : INTEGER :: k, l, n
1419 : TYPE(cp_fm_type) :: wf
1420 :
1421 2 : CALL cp_fm_create(wf, vectors%matrix_struct)
1422 2 : CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1423 2 : CALL cp_fm_get_info(rmat, ncol_global=l)
1424 :
1425 2 : CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1426 2 : CALL cp_fm_to_fm(wf, vectors)
1427 2 : CALL cp_fm_release(wf)
1428 2 : END SUBROUTINE rotate_orbitals_edf
1429 : ! **************************************************************************************************
1430 : !> \brief ...
1431 : !> \param diag ...
1432 : !> \param weights ...
1433 : !> \param matrix ...
1434 : !> \param ndim ...
1435 : ! **************************************************************************************************
1436 206 : SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1437 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1438 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1439 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1440 : INTEGER, INTENT(IN) :: ndim
1441 :
1442 : COMPLEX(KIND=dp) :: zii, zjj
1443 : INTEGER :: idim, istate, jstate, ncol_local, &
1444 : nrow_global, nrow_local
1445 206 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1446 : REAL(KIND=dp) :: gradsq_ij
1447 :
1448 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1449 : ncol_local=ncol_local, nrow_global=nrow_global, &
1450 206 : row_indices=row_indices, col_indices=col_indices)
1451 :
1452 618 : DO istate = 1, nrow_local
1453 2266 : DO jstate = 1, ncol_local
1454 : ! get real and imaginary parts
1455 1648 : gradsq_ij = 0.0_dp
1456 11536 : DO idim = 1, ndim
1457 9888 : zii = diag(row_indices(istate), idim)
1458 9888 : zjj = diag(col_indices(jstate), idim)
1459 : gradsq_ij = gradsq_ij + weights(idim)* &
1460 11536 : 4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
1461 : END DO
1462 2060 : matrix%local_data(istate, jstate) = gradsq_ij
1463 : END DO
1464 : END DO
1465 206 : END SUBROUTINE gradsq_at_0
1466 : ! **************************************************************************************************
1467 : !> \brief ...
1468 : !> \param matrix_p ...
1469 : !> \param weights ...
1470 : !> \param matrix ...
1471 : ! **************************************************************************************************
1472 0 : SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1473 : TYPE(cp_cfm_type) :: matrix_p(:)
1474 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1475 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1476 :
1477 : COMPLEX(KIND=dp) :: zii, zij, zjj
1478 0 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1479 : INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1480 : nrow_global, nrow_local
1481 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1482 : REAL(KIND=dp) :: grad_ij
1483 :
1484 0 : NULLIFY (diag)
1485 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1486 : ncol_local=ncol_local, nrow_global=nrow_global, &
1487 0 : row_indices=row_indices, col_indices=col_indices)
1488 0 : dim_m = SIZE(matrix_p, 1)
1489 0 : ALLOCATE (diag(nrow_global, dim_m))
1490 :
1491 0 : DO idim = 1, dim_m
1492 0 : DO istate = 1, nrow_global
1493 0 : CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1494 : END DO
1495 : END DO
1496 :
1497 0 : DO istate = 1, nrow_local
1498 0 : DO jstate = 1, ncol_local
1499 : ! get real and imaginary parts
1500 : grad_ij = 0.0_dp
1501 0 : DO idim = 1, dim_m
1502 0 : zii = diag(row_indices(istate), idim)
1503 0 : zjj = diag(col_indices(jstate), idim)
1504 0 : zij = matrix_p(idim)%local_data(istate, jstate)
1505 : grad_ij = grad_ij + weights(idim)* &
1506 0 : REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
1507 : END DO
1508 0 : matrix%local_data(istate, jstate) = grad_ij
1509 : END DO
1510 : END DO
1511 0 : DEALLOCATE (diag)
1512 0 : END SUBROUTINE grad_at_0
1513 :
1514 : ! return energy and maximum gradient in the current point
1515 : ! **************************************************************************************************
1516 : !> \brief ...
1517 : !> \param weights ...
1518 : !> \param zij ...
1519 : !> \param tolerance ...
1520 : !> \param value ...
1521 : ! **************************************************************************************************
1522 3196 : SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1523 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1524 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1525 : REAL(KIND=dp) :: tolerance, value
1526 :
1527 : COMPLEX(KIND=dp) :: kii, kij, kjj
1528 3196 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1529 : INTEGER :: idim, istate, jstate, ncol_local, &
1530 : nrow_global, nrow_local
1531 3196 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1532 : REAL(KIND=dp) :: grad_ij, ra, rb
1533 :
1534 3196 : NULLIFY (diag)
1535 : CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1536 : ncol_local=ncol_local, nrow_global=nrow_global, &
1537 3196 : row_indices=row_indices, col_indices=col_indices)
1538 12784 : ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1539 3196 : value = 0.0_dp
1540 12898 : DO idim = 1, SIZE(zij, 2)
1541 139714 : DO istate = 1, nrow_global
1542 126816 : CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1543 126816 : CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1544 126816 : diag(istate, idim) = CMPLX(ra, rb, dp)
1545 136518 : value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
1546 : END DO
1547 : END DO
1548 3196 : tolerance = 0.0_dp
1549 24237 : DO istate = 1, nrow_local
1550 368796 : DO jstate = 1, ncol_local
1551 1379661 : grad_ij = 0.0_dp
1552 1379661 : DO idim = 1, SIZE(zij, 2)
1553 1035102 : kii = diag(row_indices(istate), idim)
1554 1035102 : kjj = diag(col_indices(jstate), idim)
1555 1035102 : ra = zij(1, idim)%local_data(istate, jstate)
1556 1035102 : rb = zij(2, idim)%local_data(istate, jstate)
1557 1035102 : kij = CMPLX(ra, rb, dp)
1558 : grad_ij = grad_ij + weights(idim)* &
1559 1379661 : REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
1560 : END DO
1561 365600 : tolerance = MAX(ABS(grad_ij), tolerance)
1562 : END DO
1563 : END DO
1564 3196 : CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1565 :
1566 3196 : DEALLOCATE (diag)
1567 :
1568 3196 : END SUBROUTINE check_tolerance_new
1569 :
1570 : ! **************************************************************************************************
1571 : !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1572 : !> and rotates them all at the same time (hoping for the best of course)
1573 : !> \param weights ...
1574 : !> \param zij ...
1575 : !> \param vectors ...
1576 : !> \param max_iter ...
1577 : !> \param max_crazy_angle ...
1578 : !> \param crazy_scale ...
1579 : !> \param crazy_use_diag ...
1580 : !> \param eps_localization ...
1581 : !> \param iterations ...
1582 : !> \param converged ...
1583 : ! **************************************************************************************************
1584 100 : SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1585 : eps_localization, iterations, converged)
1586 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1587 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1588 : INTEGER, INTENT(IN) :: max_iter
1589 : REAL(KIND=dp), INTENT(IN) :: max_crazy_angle
1590 : REAL(KIND=dp) :: crazy_scale
1591 : LOGICAL :: crazy_use_diag
1592 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1593 : INTEGER :: iterations
1594 : LOGICAL, INTENT(out), OPTIONAL :: converged
1595 :
1596 : CHARACTER(len=*), PARAMETER :: routineN = 'crazy_rotations'
1597 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1598 : czero = (0.0_dp, 0.0_dp)
1599 :
1600 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1601 100 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1602 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1603 : INTEGER :: dim2, handle, i, icol, idim, irow, &
1604 : method, ncol_global, ncol_local, &
1605 : norder, nrow_global, nrow_local, &
1606 : nsquare, unit_nr
1607 100 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1608 : LOGICAL :: do_emd
1609 : REAL(KIND=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1610 : norm, ra, rb, theta, tolerance, value
1611 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals
1612 : TYPE(cp_cfm_type) :: cmat_A, cmat_R, cmat_t1
1613 : TYPE(cp_fm_type) :: mat_R, mat_t, mat_theta, mat_U
1614 :
1615 100 : CALL timeset(routineN, handle)
1616 100 : NULLIFY (row_indices, col_indices)
1617 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1618 : ncol_global=ncol_global, &
1619 : row_indices=row_indices, col_indices=col_indices, &
1620 100 : nrow_local=nrow_local, ncol_local=ncol_local)
1621 :
1622 100 : limit_crazy_angle = max_crazy_angle
1623 :
1624 100 : NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1625 100 : dim2 = SIZE(zij, 2)
1626 400 : ALLOCATE (diag_z(nrow_global, dim2))
1627 300 : ALLOCATE (evals(nrow_global))
1628 300 : ALLOCATE (evals_exp(nrow_global))
1629 :
1630 100 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1631 100 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1632 100 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1633 :
1634 100 : CALL cp_fm_create(mat_U, zij(1, 1)%matrix_struct)
1635 100 : CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1636 100 : CALL cp_fm_create(mat_R, zij(1, 1)%matrix_struct)
1637 :
1638 100 : CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1639 :
1640 100 : CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
1641 100 : CALL cp_fm_set_all(mat_t, 0.0_dp)
1642 500 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1643 406 : DO idim = 1, dim2
1644 306 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1645 406 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1646 : END DO
1647 100 : CALL cp_fm_syevd(mat_t, mat_U, evals)
1648 406 : DO idim = 1, dim2
1649 : ! rotate z's
1650 306 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1651 306 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1652 306 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1653 406 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1654 : END DO
1655 : ! collect rotation matrix
1656 100 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1657 100 : CALL cp_fm_to_fm(mat_t, mat_R)
1658 :
1659 100 : unit_nr = -1
1660 100 : IF (cmat_A%matrix_struct%para_env%is_source()) THEN
1661 50 : unit_nr = cp_logger_get_default_unit_nr()
1662 : WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1663 50 : "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1664 : END IF
1665 :
1666 100 : iterations = 0
1667 100 : tolerance = 1.0_dp
1668 :
1669 : DO
1670 3196 : iterations = iterations + 1
1671 12898 : DO idim = 1, dim2
1672 139714 : DO i = 1, nrow_global
1673 126816 : CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1674 126816 : CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1675 136518 : diag_z(i, idim) = CMPLX(ra, rb, dp)
1676 : END DO
1677 : END DO
1678 24237 : DO irow = 1, nrow_local
1679 368796 : DO icol = 1, ncol_local
1680 1379661 : DO idim = 1, dim2
1681 1035102 : ra = zij(1, idim)%local_data(irow, icol)
1682 1035102 : rb = zij(2, idim)%local_data(irow, icol)
1683 1035102 : mij(idim) = CMPLX(ra, rb, dp)
1684 1035102 : mii(idim) = diag_z(row_indices(irow), idim)
1685 1379661 : mjj(idim) = diag_z(col_indices(icol), idim)
1686 : END DO
1687 365600 : IF (row_indices(irow) .NE. col_indices(icol)) THEN
1688 323518 : CALL get_angle(mii, mjj, mij, weights, theta)
1689 323518 : theta = crazy_scale*theta
1690 323518 : IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1691 323518 : IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1692 323518 : IF (crazy_use_diag) THEN
1693 0 : cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
1694 : ELSE
1695 323518 : mat_theta%local_data(irow, icol) = -theta
1696 : END IF
1697 : ELSE
1698 21041 : IF (crazy_use_diag) THEN
1699 0 : cmat_A%local_data(irow, icol) = czero
1700 : ELSE
1701 21041 : mat_theta%local_data(irow, icol) = 0.0_dp
1702 : END IF
1703 : END IF
1704 : END DO
1705 : END DO
1706 :
1707 : ! construct rotation matrix U based on A using diagonalization
1708 : ! alternatively, exp based on repeated squaring could be faster
1709 3196 : IF (crazy_use_diag) THEN
1710 0 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1711 0 : maxeval = MAXVAL(ABS(evals))
1712 0 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1713 0 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1714 0 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1715 : CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1716 0 : cmat_t1, cmat_R, czero, cmat_A)
1717 0 : mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
1718 : ELSE
1719 3196 : do_emd = .FALSE.
1720 3196 : method = 2
1721 3196 : eps_exp = 1.0_dp*EPSILON(eps_exp)
1722 3196 : CALL cp_fm_maxabsrownorm(mat_theta, norm)
1723 3196 : maxeval = norm ! an upper bound
1724 3196 : CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1725 3196 : CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
1726 : END IF
1727 :
1728 12898 : DO idim = 1, dim2
1729 : ! rotate z's
1730 9702 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1731 9702 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1732 9702 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1733 12898 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1734 : END DO
1735 : ! collect rotation matrix
1736 3196 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1737 3196 : CALL cp_fm_to_fm(mat_t, mat_R)
1738 :
1739 3196 : CALL check_tolerance_new(weights, zij, tolerance, value)
1740 :
1741 3196 : IF (unit_nr > 0) THEN
1742 : WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1743 1598 : "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1744 1598 : CALL m_flush(unit_nr)
1745 : END IF
1746 3196 : IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
1747 : END DO
1748 :
1749 100 : IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1750 :
1751 100 : CALL cp_cfm_release(cmat_A)
1752 100 : CALL cp_cfm_release(cmat_R)
1753 100 : CALL cp_cfm_release(cmat_T1)
1754 :
1755 100 : CALL cp_fm_release(mat_U)
1756 100 : CALL cp_fm_release(mat_T)
1757 100 : CALL cp_fm_release(mat_theta)
1758 :
1759 100 : CALL rotate_orbitals(mat_R, vectors)
1760 :
1761 100 : CALL cp_fm_release(mat_R)
1762 100 : DEALLOCATE (evals_exp, evals, diag_z)
1763 100 : DEALLOCATE (mii, mij, mjj)
1764 :
1765 100 : CALL timestop(handle)
1766 :
1767 200 : END SUBROUTINE crazy_rotations
1768 :
1769 : ! **************************************************************************************************
1770 : !> \brief use the exponential parametrization as described in to perform a direct mini
1771 : !> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1772 : !> none of the input is modified for the time being, just finds the rotations
1773 : !> that minimizes, and throws it away afterwards :-)
1774 : !> apart from being expensive and not cleaned, this works fine
1775 : !> useful to try different spread functionals
1776 : !> \param weights ...
1777 : !> \param zij ...
1778 : !> \param vectors ...
1779 : !> \param max_iter ...
1780 : !> \param eps_localization ...
1781 : !> \param iterations ...
1782 : ! **************************************************************************************************
1783 2 : SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1784 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1785 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1786 : INTEGER, INTENT(IN) :: max_iter
1787 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1788 : INTEGER :: iterations
1789 :
1790 : CHARACTER(len=*), PARAMETER :: routineN = 'direct_mini'
1791 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1792 : czero = (0.0_dp, 0.0_dp)
1793 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
1794 :
1795 : COMPLEX(KIND=dp) :: lk, ll, tmp
1796 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1797 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1798 : INTEGER :: handle, i, icol, idim, irow, &
1799 : line_search_count, line_searches, lsl, &
1800 : lsm, lsr, n, ncol_local, ndim, &
1801 : nrow_local, output_unit
1802 2 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1803 : LOGICAL :: new_direction
1804 : REAL(KIND=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1805 : fb, fc, nom, normg, normg_cross, &
1806 : normg_old, npos, omega, tol, val, x0, &
1807 : x1, xa, xb, xc
1808 : REAL(KIND=dp), DIMENSION(150) :: energy, grad, pos
1809 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1810 : TYPE(cp_cfm_type) :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
1811 : cmat_t2, cmat_U
1812 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1813 : TYPE(cp_fm_type) :: matrix_A, matrix_G, matrix_G_old, &
1814 : matrix_G_search, matrix_H, matrix_R, &
1815 : matrix_T
1816 :
1817 2 : NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1818 :
1819 2 : CALL timeset(routineN, handle)
1820 2 : output_unit = cp_logger_get_default_io_unit()
1821 :
1822 2 : n = zij(1, 1)%matrix_struct%nrow_global
1823 2 : ndim = (SIZE(zij, 2))
1824 :
1825 2 : IF (output_unit > 0) THEN
1826 1 : WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1827 1 : WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1828 : END IF
1829 :
1830 20 : ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1831 18 : ALLOCATE (c_zij(ndim))
1832 :
1833 : ! create the three complex matrices Z
1834 14 : DO idim = 1, ndim
1835 12 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1836 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
1837 158 : zij(2, idim)%local_data, dp)
1838 : END DO
1839 :
1840 2 : CALL cp_fm_create(matrix_A, zij(1, 1)%matrix_struct)
1841 2 : CALL cp_fm_create(matrix_G, zij(1, 1)%matrix_struct)
1842 2 : CALL cp_fm_create(matrix_T, zij(1, 1)%matrix_struct)
1843 2 : CALL cp_fm_create(matrix_H, zij(1, 1)%matrix_struct)
1844 2 : CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix_struct)
1845 2 : CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix_struct)
1846 2 : CALL cp_fm_create(matrix_R, zij(1, 1)%matrix_struct)
1847 2 : CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
1848 :
1849 2 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1850 : ! CALL cp_fm_init_random ( matrix_A )
1851 :
1852 2 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1853 2 : CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix_struct)
1854 2 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1855 2 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1856 2 : CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1857 2 : CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix_struct)
1858 2 : CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix_struct)
1859 :
1860 : CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
1861 2 : row_indices=row_indices, col_indices=col_indices)
1862 :
1863 2 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1864 2 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1865 2 : normg_old = 1.0E30_dp
1866 2 : ds_min = 1.0_dp
1867 2 : new_direction = .TRUE.
1868 2 : Iterations = 0
1869 2 : line_searches = 0
1870 2 : line_search_count = 0
1871 206 : DO
1872 206 : iterations = iterations + 1
1873 : ! compute U,R,evals given A
1874 2678 : cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
1875 206 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1876 1030 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1877 206 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1878 206 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1879 206 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
1880 2678 : cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
1881 :
1882 206 : IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
1883 0 : DO idim = 1, ndim
1884 0 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1)
1885 0 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim))
1886 : END DO
1887 : ! collect rotation matrix
1888 0 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
1889 0 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
1890 0 : CALL cp_fm_to_fm(matrix_T, matrix_R)
1891 :
1892 0 : CALL cp_cfm_set_all(cmat_U, czero, cone)
1893 0 : CALL cp_cfm_set_all(cmat_R, czero, cone)
1894 0 : CALL cp_cfm_set_all(cmat_A, czero)
1895 0 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1896 0 : evals(:) = 0.0_dp
1897 0 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1898 0 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1899 0 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1900 0 : normg_old = 1.0E30_dp
1901 : END IF
1902 :
1903 : ! compute Omega and M
1904 206 : CALL cp_cfm_set_all(cmat_M, czero)
1905 206 : omega = 0.0_dp
1906 1442 : DO idim = 1, ndim
1907 1236 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1) ! t1=ZU
1908 1236 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1909 6180 : DO i = 1, n
1910 4944 : CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1911 : SELECT CASE (2) ! allows for selection of different spread functionals
1912 : CASE (1)
1913 : fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
1914 : fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
1915 : CASE (2) ! corresponds to the jacobi setup
1916 4944 : fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
1917 4944 : fvald(i) = -weights(idim)
1918 : END SELECT
1919 6180 : omega = omega + fval(i)
1920 : END DO
1921 6386 : DO icol = 1, ncol_local
1922 16068 : DO irow = 1, nrow_local
1923 9888 : tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
1924 : cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
1925 14832 : + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
1926 : END DO
1927 : END DO
1928 : END DO
1929 :
1930 : ! compute Hessian diagonal approximation for the preconditioner
1931 : IF (.TRUE.) THEN
1932 206 : CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
1933 : ELSE
1934 : CALL cp_fm_set_all(matrix_H, 1.0_dp)
1935 : END IF
1936 :
1937 : ! compute B
1938 1030 : DO icol = 1, ncol_local
1939 2678 : DO irow = 1, nrow_local
1940 1648 : ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1941 1648 : lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1942 2472 : IF (ABS(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1943 628 : tmp = 1.0_dp
1944 628 : cmat_B%local_data(irow, icol) = 0.0_dp
1945 10676 : DO i = 1, 16
1946 10048 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
1947 10676 : tmp = tmp*(ll - lk)/(i + 1)
1948 : END DO
1949 628 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
1950 : ELSE
1951 1020 : cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
1952 : END IF
1953 : END DO
1954 : END DO
1955 : ! compute gradient matrix_G
1956 :
1957 206 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
1958 206 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1959 206 : CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
1960 206 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
1961 206 : CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
1962 2678 : matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
1963 206 : CALL cp_fm_transpose(matrix_G, matrix_T)
1964 206 : CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
1965 206 : CALL cp_fm_maxabsval(matrix_G, tol)
1966 :
1967 : ! from here on, minimizing technology
1968 206 : IF (new_direction) THEN
1969 : ! energy converged up to machine precision ?
1970 10 : line_searches = line_searches + 1
1971 : ! DO i=1,line_search_count
1972 : ! write(15,*) pos(i),energy(i)
1973 : ! ENDDO
1974 : ! write(15,*) ""
1975 : ! CALL m_flush(15)
1976 : !write(16,*) evals(:)
1977 : !write(17,*) matrix_A%local_data(:,:)
1978 : !write(18,*) matrix_G%local_data(:,:)
1979 10 : IF (output_unit > 0) THEN
1980 5 : WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
1981 5 : CALL m_flush(output_unit)
1982 : END IF
1983 10 : IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1984 :
1985 : IF (.TRUE.) THEN ! do conjugate gradient CG
1986 8 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
1987 8 : normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1988 : ! apply the preconditioner
1989 40 : DO icol = 1, ncol_local
1990 104 : DO irow = 1, nrow_local
1991 96 : matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
1992 : END DO
1993 : END DO
1994 8 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
1995 8 : normg = normg*0.5_dp
1996 8 : beta_pr = (normg - normg_cross)/normg_old
1997 8 : normg_old = normg
1998 8 : beta_pr = MAX(beta_pr, 0.0_dp)
1999 8 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
2000 8 : CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
2001 8 : IF (normg_cross .GE. 0) THEN ! back to SD
2002 0 : IF (matrix_A%matrix_struct%para_env%is_source()) THEN
2003 0 : WRITE (cp_logger_get_default_unit_nr(), *) "!"
2004 : END IF
2005 0 : beta_pr = 0.0_dp
2006 0 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
2007 : END IF
2008 : ELSE ! SD
2009 : CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
2010 : END IF
2011 : ! ds_min=1.0E-4_dp
2012 : line_search_count = 0
2013 : END IF
2014 204 : line_search_count = line_search_count + 1
2015 204 : energy(line_search_count) = Omega
2016 :
2017 : ! line search section
2018 : SELECT CASE (3)
2019 : CASE (1) ! two point line search
2020 : SELECT CASE (line_search_count)
2021 : CASE (1)
2022 : pos(1) = 0.0_dp
2023 : pos(2) = ds_min
2024 : CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
2025 : grad(1) = grad(1)/2.0_dp
2026 : new_direction = .FALSE.
2027 : CASE (2)
2028 : new_direction = .TRUE.
2029 : x0 = pos(1) ! 0.0_dp
2030 : c = energy(1)
2031 : b = grad(1)
2032 : x1 = pos(2)
2033 : a = (energy(2) - b*x1 - c)/(x1**2)
2034 : IF (a .LE. 0.0_dp) a = 1.0E-15_dp
2035 : npos = -b/(2.0_dp*a)
2036 : val = a*npos**2 + b*npos + c
2037 : IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
2038 : ! we go to a minimum, but ...
2039 : ! we take a guard against too large steps
2040 : pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
2041 : ELSE ! just take an extended step
2042 : pos(3) = MAXVAL(pos(1:2))*2.0_dp
2043 : END IF
2044 : END SELECT
2045 : CASE (2) ! 3 point line search
2046 : SELECT CASE (line_search_count)
2047 : CASE (1)
2048 : new_direction = .FALSE.
2049 : pos(1) = 0.0_dp
2050 : pos(2) = ds_min*0.8_dp
2051 : CASE (2)
2052 : new_direction = .FALSE.
2053 : IF (energy(2) .GT. energy(1)) THEN
2054 : pos(3) = ds_min*0.7_dp
2055 : ELSE
2056 : pos(3) = ds_min*1.4_dp
2057 : END IF
2058 : CASE (3)
2059 : new_direction = .TRUE.
2060 : xa = pos(1)
2061 : xb = pos(2)
2062 : xc = pos(3)
2063 : fa = energy(1)
2064 : fb = energy(2)
2065 : fc = energy(3)
2066 : nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2067 : denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2068 : IF (ABS(denom) .LE. 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
2069 : npos = xb
2070 : ELSE
2071 : npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2072 : END IF
2073 : val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2074 : (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2075 : (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2076 : IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
2077 : ! we take a guard against too large steps
2078 : pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
2079 : MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
2080 : ELSE ! just take an extended step
2081 : pos(4) = MAXVAL(pos(1:3))*2.0_dp
2082 : END IF
2083 : END SELECT
2084 : CASE (3) ! golden section hunt
2085 204 : new_direction = .FALSE.
2086 204 : IF (line_search_count .EQ. 1) THEN
2087 8 : lsl = 1
2088 8 : lsr = 0
2089 8 : lsm = 1
2090 8 : pos(1) = 0.0_dp
2091 8 : pos(2) = ds_min/gold_sec
2092 : ELSE
2093 196 : IF (line_search_count .EQ. 150) CPABORT("Too many")
2094 196 : IF (lsr .EQ. 0) THEN
2095 14 : IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
2096 8 : lsr = line_search_count
2097 8 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2098 : ELSE
2099 6 : lsl = lsm
2100 6 : lsm = line_search_count
2101 6 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2102 : END IF
2103 : ELSE
2104 182 : IF (pos(line_search_count) .LT. pos(lsm)) THEN
2105 92 : IF (energy(line_search_count) .LT. energy(lsm)) THEN
2106 : lsr = lsm
2107 : lsm = line_search_count
2108 : ELSE
2109 78 : lsl = line_search_count
2110 : END IF
2111 : ELSE
2112 90 : IF (energy(line_search_count) .LT. energy(lsm)) THEN
2113 : lsl = lsm
2114 : lsm = line_search_count
2115 : ELSE
2116 62 : lsr = line_search_count
2117 : END IF
2118 : END IF
2119 182 : IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
2120 86 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2121 : ELSE
2122 96 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2123 : END IF
2124 182 : IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN
2125 8 : new_direction = .TRUE.
2126 : END IF
2127 : END IF ! lsr .eq. 0
2128 : END IF ! first step
2129 : END SELECT
2130 : ! now go to the suggested point
2131 204 : ds_min = pos(line_search_count + 1)
2132 204 : ds = pos(line_search_count + 1) - pos(line_search_count)
2133 206 : CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
2134 : END DO
2135 :
2136 : ! collect rotation matrix
2137 26 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
2138 2 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
2139 2 : CALL cp_fm_to_fm(matrix_T, matrix_R)
2140 2 : CALL rotate_orbitals(matrix_R, vectors)
2141 2 : CALL cp_fm_release(matrix_R)
2142 :
2143 2 : CALL cp_fm_release(matrix_A)
2144 2 : CALL cp_fm_release(matrix_G)
2145 2 : CALL cp_fm_release(matrix_H)
2146 2 : CALL cp_fm_release(matrix_T)
2147 2 : CALL cp_fm_release(matrix_G_search)
2148 2 : CALL cp_fm_release(matrix_G_old)
2149 2 : CALL cp_cfm_release(cmat_A)
2150 2 : CALL cp_cfm_release(cmat_U)
2151 2 : CALL cp_cfm_release(cmat_R)
2152 2 : CALL cp_cfm_release(cmat_t1)
2153 2 : CALL cp_cfm_release(cmat_t2)
2154 2 : CALL cp_cfm_release(cmat_B)
2155 2 : CALL cp_cfm_release(cmat_M)
2156 :
2157 2 : DEALLOCATE (evals, evals_exp, fval, fvald)
2158 :
2159 14 : DO idim = 1, SIZE(c_zij)
2160 156 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
2161 156 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
2162 14 : CALL cp_cfm_release(c_zij(idim))
2163 : END DO
2164 2 : DEALLOCATE (c_zij)
2165 2 : DEALLOCATE (diag_z)
2166 :
2167 2 : CALL timestop(handle)
2168 :
2169 6 : END SUBROUTINE
2170 :
2171 : ! **************************************************************************************************
2172 : !> \brief Parallel algorithm for jacobi rotations
2173 : !> \param weights ...
2174 : !> \param zij ...
2175 : !> \param vectors ...
2176 : !> \param para_env ...
2177 : !> \param max_iter ...
2178 : !> \param eps_localization ...
2179 : !> \param sweeps ...
2180 : !> \param out_each ...
2181 : !> \param target_time ...
2182 : !> \param start_time ...
2183 : !> \param restricted ...
2184 : !> \par History
2185 : !> use allgather for improved performance
2186 : !> \author MI (11.2009)
2187 : ! **************************************************************************************************
2188 388 : SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2189 : sweeps, out_each, target_time, start_time, restricted)
2190 :
2191 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2192 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2193 : TYPE(mp_para_env_type), POINTER :: para_env
2194 : INTEGER, INTENT(IN) :: max_iter
2195 : REAL(KIND=dp), INTENT(IN) :: eps_localization
2196 : INTEGER :: sweeps
2197 : INTEGER, INTENT(IN) :: out_each
2198 : REAL(dp) :: target_time, start_time
2199 : INTEGER :: restricted
2200 :
2201 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para'
2202 :
2203 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2204 : nblock, nblock_max, ns_me, nstate, &
2205 : output_unit
2206 388 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2207 388 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2208 : REAL(KIND=dp) :: xstate
2209 : TYPE(cp_fm_type) :: rmat
2210 388 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2211 :
2212 388 : CALL timeset(routineN, handle)
2213 :
2214 388 : output_unit = cp_logger_get_default_io_unit()
2215 :
2216 388 : NULLIFY (cz_ij_loc)
2217 :
2218 388 : dim2 = SIZE(zij, 2)
2219 :
2220 388 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2221 388 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2222 :
2223 388 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
2224 :
2225 388 : IF (restricted > 0) THEN
2226 0 : IF (output_unit > 0) THEN
2227 0 : WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2228 : END IF
2229 0 : nstate = nstate - restricted
2230 : END IF
2231 :
2232 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2233 388 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2234 1552 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2235 1164 : DO ip = 1, para_env%num_pe
2236 776 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2237 1164 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2238 : END DO
2239 388 : nblock_max = 0
2240 1164 : DO ip = 0, para_env%num_pe - 1
2241 776 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2242 1164 : nblock_max = MAX(nblock_max, nblock)
2243 : END DO
2244 :
2245 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2246 1552 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2247 1164 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2248 2352 : ALLOCATE (cz_ij_loc(dim2))
2249 1576 : DO idim = 1, dim2
2250 3952 : DO ip = 0, para_env%num_pe - 1
2251 2376 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2252 2376 : CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2253 2376 : CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2254 3564 : IF (para_env%mepos == ip) THEN
2255 4731 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2256 5025 : DO i = 1, nblock
2257 37818 : DO j = 1, nstate
2258 36630 : cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2259 : END DO
2260 : END DO
2261 : END IF
2262 : END DO ! ip
2263 : END DO
2264 388 : DEALLOCATE (z_ij_loc_re)
2265 388 : DEALLOCATE (z_ij_loc_im)
2266 :
2267 1552 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2268 :
2269 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2270 : cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2271 388 : target_time=target_time, start_time=start_time)
2272 :
2273 388 : ilow1 = ns_bound(para_env%mepos, 1)
2274 388 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2275 1164 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2276 1164 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2277 1576 : DO idim = 1, dim2
2278 3952 : DO ip = 0, para_env%num_pe - 1
2279 79788 : z_ij_loc_re = 0.0_dp
2280 79788 : z_ij_loc_im = 0.0_dp
2281 2376 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2282 2376 : IF (ip == para_env%mepos) THEN
2283 5025 : ns_me = nblock
2284 5025 : DO i = 1, ns_me
2285 36630 : ii = ilow1 + i - 1
2286 37818 : DO j = 1, nstate
2287 32793 : z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
2288 36630 : z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
2289 : END DO
2290 : END DO
2291 : END IF
2292 2376 : CALL para_env%bcast(z_ij_loc_re, ip)
2293 2376 : CALL para_env%bcast(z_ij_loc_im, ip)
2294 2376 : CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2295 3564 : CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2296 : END DO ! ip
2297 : END DO
2298 :
2299 1164 : DO ip = 0, para_env%num_pe - 1
2300 25692 : z_ij_loc_re = 0.0_dp
2301 776 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2302 776 : IF (ip == para_env%mepos) THEN
2303 1630 : ns_me = nblock
2304 1630 : DO i = 1, ns_me
2305 11772 : ii = ilow1 + i - 1
2306 12160 : DO j = 1, nstate
2307 11772 : z_ij_loc_re(j, i) = rotmat(j, i)
2308 : END DO
2309 : END DO
2310 : END IF
2311 776 : CALL para_env%bcast(z_ij_loc_re, ip)
2312 1164 : CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2313 : END DO
2314 :
2315 388 : DEALLOCATE (z_ij_loc_re)
2316 388 : DEALLOCATE (z_ij_loc_im)
2317 1576 : DO idim = 1, dim2
2318 1576 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2319 : END DO
2320 388 : DEALLOCATE (cz_ij_loc)
2321 :
2322 388 : CALL para_env%sync()
2323 388 : CALL rotate_orbitals(rmat, vectors)
2324 388 : CALL cp_fm_release(rmat)
2325 :
2326 388 : DEALLOCATE (rotmat)
2327 388 : DEALLOCATE (ns_bound)
2328 :
2329 388 : CALL timestop(handle)
2330 :
2331 1164 : END SUBROUTINE jacobi_rot_para
2332 :
2333 : ! **************************************************************************************************
2334 : !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2335 : !> \param weights ...
2336 : !> \param czij ...
2337 : !> \param para_env ...
2338 : !> \param max_iter ...
2339 : !> \param rmat ...
2340 : !> \param tol_out ...
2341 : !> \author Soumya Ghosh (08/21)
2342 : ! **************************************************************************************************
2343 32 : SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2344 :
2345 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2346 : TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2347 : TYPE(mp_para_env_type), POINTER :: para_env
2348 : INTEGER, INTENT(IN) :: max_iter
2349 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
2350 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2351 :
2352 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para_1'
2353 :
2354 32 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2355 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2356 : nblock, nblock_max, ns_me, nstate, &
2357 : sweeps
2358 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2359 32 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2360 : REAL(KIND=dp) :: xstate
2361 32 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2362 :
2363 32 : CALL timeset(routineN, handle)
2364 :
2365 32 : dim2 = SIZE(czij)
2366 :
2367 32 : CALL cp_cfm_set_all(rmat, CMPLX(0._dp, 0._dp, dp), CMPLX(1._dp, 0._dp, dp))
2368 :
2369 32 : CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2370 :
2371 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2372 32 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2373 128 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2374 96 : DO ip = 1, para_env%num_pe
2375 64 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2376 96 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2377 : END DO
2378 32 : nblock_max = 0
2379 96 : DO ip = 0, para_env%num_pe - 1
2380 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2381 96 : nblock_max = MAX(nblock_max, nblock)
2382 : END DO
2383 :
2384 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2385 128 : ALLOCATE (czij_array(nstate, nblock_max))
2386 192 : ALLOCATE (cz_ij_loc(dim2))
2387 128 : DO idim = 1, dim2
2388 320 : DO ip = 0, para_env%num_pe - 1
2389 192 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2390 : ! cfm --> allocatable
2391 192 : CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2392 288 : IF (para_env%mepos == ip) THEN
2393 96 : ns_me = nblock
2394 384 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2395 2160 : DO i = 1, ns_me
2396 90912 : DO j = 1, nstate
2397 90816 : cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2398 : END DO
2399 : END DO
2400 : END IF
2401 : END DO ! ip
2402 : END DO
2403 32 : DEALLOCATE (czij_array)
2404 :
2405 128 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2406 :
2407 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2408 32 : cz_ij_loc, rotmat, 0, tol_out=tol_out)
2409 :
2410 32 : ilow1 = ns_bound(para_env%mepos, 1)
2411 32 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2412 128 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2413 :
2414 96 : DO ip = 0, para_env%num_pe - 1
2415 62016 : z_ij_loc_re = 0.0_dp
2416 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2417 64 : IF (ip == para_env%mepos) THEN
2418 720 : ns_me = nblock
2419 720 : DO i = 1, ns_me
2420 30272 : ii = ilow1 + i - 1
2421 30304 : DO j = 1, nstate
2422 30272 : z_ij_loc_re(j, i) = rotmat(j, i)
2423 : END DO
2424 : END DO
2425 : END IF
2426 64 : CALL para_env%bcast(z_ij_loc_re, ip)
2427 62048 : CALL cp_cfm_set_submatrix(rmat, CMPLX(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2428 : END DO
2429 :
2430 32 : DEALLOCATE (z_ij_loc_re)
2431 128 : DO idim = 1, dim2
2432 128 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2433 : END DO
2434 32 : DEALLOCATE (cz_ij_loc)
2435 :
2436 32 : CALL para_env%sync()
2437 :
2438 32 : DEALLOCATE (rotmat)
2439 32 : DEALLOCATE (ns_bound)
2440 :
2441 32 : CALL timestop(handle)
2442 :
2443 64 : END SUBROUTINE jacobi_rot_para_1
2444 :
2445 : ! **************************************************************************************************
2446 : !> \brief Parallel algorithm for jacobi rotations
2447 : !> \param weights ...
2448 : !> \param para_env ...
2449 : !> \param max_iter ...
2450 : !> \param sweeps ...
2451 : !> \param out_each ...
2452 : !> \param dim2 ...
2453 : !> \param nstate ...
2454 : !> \param nblock_max ...
2455 : !> \param ns_bound ...
2456 : !> \param cz_ij_loc ...
2457 : !> \param rotmat ...
2458 : !> \param output_unit ...
2459 : !> \param tol_out ...
2460 : !> \param eps_localization ...
2461 : !> \param target_time ...
2462 : !> \param start_time ...
2463 : !> \par History
2464 : !> split out to reuse with different input types
2465 : !> \author HF (05.2022)
2466 : ! **************************************************************************************************
2467 1260 : SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2468 420 : ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2469 :
2470 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2471 : TYPE(mp_para_env_type), POINTER :: para_env
2472 : INTEGER, INTENT(IN) :: max_iter
2473 : INTEGER, INTENT(OUT) :: sweeps
2474 : INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2475 : INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2476 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2477 : REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2478 : INTEGER, INTENT(IN) :: output_unit
2479 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2480 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
2481 : REAL(dp), OPTIONAL :: target_time, start_time
2482 :
2483 : COMPLEX(KIND=dp) :: zi, zj
2484 420 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2485 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2486 : INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2487 : ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2488 : iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2489 : ns_recv_from, ns_recv_partner
2490 420 : INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2491 420 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2492 : LOGICAL :: should_stop
2493 420 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2494 420 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2495 : REAL(KIND=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2496 : t2, theta, tolerance, zc, zr
2497 420 : TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2498 420 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2499 :
2500 420 : NULLIFY (zdiag_all, zdiag_me)
2501 420 : NULLIFY (xyz_mix, xyz_mix_ns)
2502 : NULLIFY (mii, mij, mjj)
2503 :
2504 2100 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2505 :
2506 1260 : ALLOCATE (rcount(para_env%num_pe), STAT=istat)
2507 840 : ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
2508 :
2509 420 : tolerance = 1.0e10_dp
2510 420 : sweeps = 0
2511 :
2512 : ! number of processor pairs and number of permutations
2513 420 : npair = (para_env%num_pe + 1)/2
2514 420 : nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
2515 1260 : ALLOCATE (list_pair(2, npair))
2516 :
2517 : ! initialize rotation matrix
2518 87288 : rotmat = 0.0_dp
2519 2350 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2520 1930 : ii = i - ns_bound(para_env%mepos, 1) + 1
2521 2350 : rotmat(i, ii) = 1.0_dp
2522 : END DO
2523 :
2524 2544 : ALLOCATE (xyz_mix(dim2))
2525 2124 : ALLOCATE (xyz_mix_ns(dim2))
2526 2544 : ALLOCATE (zdiag_me(dim2))
2527 2124 : ALLOCATE (zdiag_all(dim2))
2528 :
2529 420 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2530 420 : IF (ns_me /= 0) THEN
2531 2065 : ALLOCATE (c_array_me(nstate, ns_me, dim2))
2532 1676 : DO idim = 1, dim2
2533 5465 : ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2534 : END DO
2535 1652 : ALLOCATE (gmat(nstate, ns_me))
2536 : END IF
2537 :
2538 1704 : DO idim = 1, dim2
2539 3852 : ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2540 7518 : zdiag_me(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
2541 3852 : ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2542 14172 : zdiag_all(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
2543 : END DO
2544 1680 : ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2545 1260 : ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2546 :
2547 : ! buffer for message passing
2548 2100 : ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2549 :
2550 420 : IF (output_unit > 0) THEN
2551 194 : WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2552 194 : WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2553 : END IF
2554 :
2555 87208 : DO lsweep = 1, max_iter + 1
2556 87208 : sweeps = lsweep
2557 87208 : IF (sweeps == max_iter + 1) THEN
2558 156 : IF (output_unit > 0) THEN
2559 62 : WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2560 62 : WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2561 : END IF
2562 : EXIT
2563 : END IF
2564 87052 : t1 = m_walltime()
2565 :
2566 174104 : DO iperm = 1, nperm
2567 :
2568 : ! fix partners for this permutation, and get the number of states
2569 87052 : CALL eberlein(iperm, para_env, list_pair)
2570 87052 : ip_partner = -1
2571 87052 : ns_partner = 0
2572 87052 : DO ipair = 1, npair
2573 87052 : IF (list_pair(1, ipair) == para_env%mepos) THEN
2574 43526 : ip_partner = list_pair(2, ipair)
2575 43526 : EXIT
2576 43526 : ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2577 43526 : ip_partner = list_pair(1, ipair)
2578 43526 : EXIT
2579 : END IF
2580 : END DO
2581 87052 : IF (ip_partner >= 0) THEN
2582 87052 : ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2583 : ELSE
2584 : ns_partner = 0
2585 : END IF
2586 :
2587 : ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2588 87052 : IF (ns_partner*ns_me /= 0) THEN
2589 :
2590 348152 : ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2591 5089598 : rmat_loc = 0.0_dp
2592 698582 : DO i = 1, ns_me + ns_partner
2593 698582 : rmat_loc(i, i) = 1.0_dp
2594 : END DO
2595 :
2596 435190 : ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2597 :
2598 349754 : DO idim = 1, dim2
2599 1050864 : ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2600 1276511 : DO i = 1, ns_me
2601 7890792 : c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2602 : END DO
2603 : END DO
2604 :
2605 : CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2606 87038 : msgout=c_array_partner, source=ip_partner)
2607 :
2608 87038 : n1 = ns_me
2609 87038 : n2 = ns_partner
2610 87038 : ilow1 = ns_bound(para_env%mepos, 1)
2611 87038 : iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2612 87038 : ilow2 = ns_bound(ip_partner, 1)
2613 87038 : iup2 = ns_bound(ip_partner, 1) + n2 - 1
2614 87038 : IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2615 43519 : il1 = 1
2616 : iu1 = n1
2617 43519 : iu1 = n1
2618 43519 : il2 = 1 + n1
2619 43519 : iu2 = n1 + n2
2620 : ELSE
2621 43519 : il1 = 1 + n2
2622 : iu1 = n1 + n2
2623 43519 : iu1 = n1 + n2
2624 43519 : il2 = 1
2625 43519 : iu2 = n2
2626 : END IF
2627 :
2628 349754 : DO idim = 1, dim2
2629 1189473 : DO i = 1, n1
2630 4337826 : xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2631 4479723 : xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2632 : END DO
2633 1276511 : DO i = 1, n2
2634 4337826 : xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2635 4479723 : xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2636 : END DO
2637 : END DO
2638 :
2639 698582 : DO istate = 1, n1 + n2
2640 2588318 : DO jstate = istate + 1, n1 + n2
2641 7664298 : DO idim = 1, dim2
2642 5774562 : mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2643 5774562 : mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2644 7664298 : mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2645 : END DO
2646 1889736 : CALL get_angle(mii, mjj, mij, weights, theta)
2647 1889736 : st = SIN(theta)
2648 1889736 : ct = COS(theta)
2649 7664298 : DO idim = 1, dim2
2650 51288858 : DO i = 1, n1 + n2
2651 45514296 : zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2652 45514296 : zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2653 45514296 : xyz_mix(idim)%c_array(i, istate) = zi
2654 51288858 : xyz_mix(idim)%c_array(i, jstate) = zj
2655 : END DO
2656 53178594 : DO i = 1, n1 + n2
2657 45514296 : zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2658 45514296 : zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2659 45514296 : xyz_mix(idim)%c_array(istate, i) = zi
2660 51288858 : xyz_mix(idim)%c_array(jstate, i) = zj
2661 : END DO
2662 : END DO
2663 :
2664 17237808 : DO i = 1, n1 + n2
2665 14736528 : ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2666 14736528 : rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2667 14736528 : rmat_loc(i, istate) = ri
2668 16626264 : rmat_loc(i, jstate) = rj
2669 : END DO
2670 : END DO
2671 : END DO
2672 :
2673 87038 : k = nblock_max + 1
2674 : CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2675 5089598 : rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2676 :
2677 87038 : IF (ilow1 < ilow2) THEN
2678 : ! no longer compiles in official sdgb:
2679 : !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2680 : ! probably inefficient:
2681 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2682 1467175 : n2, 0.0_dp, gmat(:, :), nstate)
2683 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2684 43519 : n1 + n2, 1.0_dp, gmat(:, :), nstate)
2685 : ELSE
2686 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2687 43519 : rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2688 : ! no longer compiles in official sdgb:
2689 : !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2690 : ! probably inefficient:
2691 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2692 1147207 : n1, 1.0_dp, gmat(:, :), nstate)
2693 : END IF
2694 :
2695 87038 : CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2696 :
2697 349754 : DO idim = 1, dim2
2698 1189473 : DO i = 1, n1
2699 7890792 : xyz_mix_ns(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
2700 : END DO
2701 :
2702 1189473 : DO istate = 1, n1
2703 7890792 : DO jstate = 1, nstate
2704 33316224 : DO i = 1, n2
2705 : xyz_mix_ns(idim)%c_array(jstate, istate) = &
2706 : xyz_mix_ns(idim)%c_array(jstate, istate) + &
2707 32389467 : c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2708 : END DO
2709 : END DO
2710 : END DO
2711 1276511 : DO istate = 1, n1
2712 7890792 : DO jstate = 1, nstate
2713 34155543 : DO i = 1, n1
2714 : xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2715 33228786 : c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2716 : END DO
2717 : END DO
2718 : END DO
2719 : END DO ! idim
2720 :
2721 87038 : DEALLOCATE (c_array_partner)
2722 :
2723 : ELSE ! save my data
2724 56 : DO idim = 1, dim2
2725 77 : DO i = 1, ns_me
2726 105 : xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2727 : END DO
2728 : END DO
2729 : END IF
2730 :
2731 349810 : DO idim = 1, dim2
2732 1276588 : DO i = 1, ns_me
2733 7890876 : cz_ij_loc(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
2734 : END DO
2735 : END DO
2736 :
2737 87052 : IF (ns_partner*ns_me /= 0) THEN
2738 : ! transpose rotation matrix rmat_loc
2739 698582 : DO i = 1, ns_me + ns_partner
2740 2588318 : DO j = i + 1, ns_me + ns_partner
2741 1889736 : ri = rmat_loc(i, j)
2742 1889736 : rmat_loc(i, j) = rmat_loc(j, i)
2743 2501280 : rmat_loc(j, i) = ri
2744 : END DO
2745 : END DO
2746 :
2747 : ! prepare for distribution
2748 392810 : DO i = 1, n1
2749 1510694 : rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2750 : END DO
2751 87038 : ik = nblock_max
2752 392810 : DO i = 1, n2
2753 1470434 : rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2754 : END DO
2755 : ELSE
2756 56 : rmat_send = 0.0_dp
2757 : END IF
2758 :
2759 : ! collect data from all tasks (this takes some significant time)
2760 87052 : CALL para_env%allgather(rmat_send, rmat_recv_all)
2761 :
2762 : ! update blocks everywhere
2763 261156 : DO ip = 0, para_env%num_pe - 1
2764 :
2765 174104 : ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
2766 6456620 : rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2767 :
2768 174104 : ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2769 :
2770 261156 : IF (ns_me /= 0) THEN
2771 174090 : IF (ns_recv_from /= 0) THEN
2772 : !look for the partner of ip_recv_from
2773 174083 : ip_recv_partner = -1
2774 174083 : ns_recv_partner = 0
2775 174083 : DO ipair = 1, npair
2776 174083 : IF (list_pair(1, ipair) == ip_recv_from) THEN
2777 87045 : ip_recv_partner = list_pair(2, ipair)
2778 87045 : EXIT
2779 87038 : ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2780 : ip_recv_partner = list_pair(1, ipair)
2781 : EXIT
2782 : END IF
2783 : END DO
2784 :
2785 174083 : IF (ip_recv_partner >= 0) THEN
2786 174083 : ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2787 : END IF
2788 174083 : IF (ns_recv_partner > 0) THEN
2789 174076 : il1 = ns_bound(para_env%mepos, 1)
2790 174076 : il_recv = ns_bound(ip_recv_from, 1)
2791 174076 : il_recv_partner = ns_bound(ip_recv_partner, 1)
2792 174076 : ik = nblock_max
2793 :
2794 699508 : DO idim = 1, dim2
2795 2378946 : DO i = 1, ns_recv_from
2796 1853514 : ii = il_recv + i - 1
2797 9080265 : DO j = 1, ns_me
2798 33228786 : jj = j
2799 35082300 : DO k = 1, ns_recv_from
2800 26527467 : kk = il_recv + k - 1
2801 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2802 33228786 : rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2803 : END DO
2804 : END DO
2805 : END DO
2806 2553022 : DO i = 1, ns_recv_from
2807 1853514 : ii = il_recv + i - 1
2808 9080265 : DO j = 1, ns_me
2809 32389467 : jj = j
2810 34242981 : DO k = 1, ns_recv_partner
2811 25688148 : kk = il_recv_partner + k - 1
2812 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2813 32389467 : rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2814 : END DO
2815 : END DO
2816 : END DO
2817 : END DO ! idim
2818 : ELSE
2819 7 : il1 = ns_bound(para_env%mepos, 1)
2820 7 : il_recv = ns_bound(ip_recv_from, 1)
2821 28 : DO idim = 1, dim2
2822 49 : DO j = 1, ns_me
2823 42 : jj = j
2824 63 : DO i = 1, ns_recv_from
2825 21 : ii = il_recv + i - 1
2826 42 : cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2827 : END DO
2828 : END DO
2829 : END DO ! idim
2830 : END IF
2831 : END IF
2832 : END IF ! ns_me
2833 : END DO ! ip
2834 :
2835 174104 : IF (ns_partner*ns_me /= 0) THEN
2836 87038 : DEALLOCATE (rmat_loc)
2837 349754 : DO idim = 1, dim2
2838 349754 : DEALLOCATE (xyz_mix(idim)%c_array)
2839 : END DO
2840 : END IF
2841 :
2842 : END DO ! iperm
2843 :
2844 : ! calculate the max gradient
2845 349810 : DO idim = 1, dim2
2846 1189536 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2847 926778 : ii = i - ns_bound(para_env%mepos, 1) + 1
2848 926778 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2849 1189536 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2850 : END DO
2851 788274 : rcount(:) = SIZE(zdiag_me(idim)%c_array)
2852 262758 : rdispl(1) = 0
2853 525516 : DO ip = 2, para_env%num_pe
2854 525516 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2855 : END DO
2856 : ! collect all the diagonal elements in a replicated 1d array
2857 3492664 : CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2858 : END DO
2859 :
2860 87052 : gmax = 0.0_dp
2861 392831 : DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2862 305779 : k = j - ns_bound(para_env%mepos, 1) + 1
2863 1337699 : DO i = 1, j - 1
2864 : ! find the location of the diagonal element (i,i)
2865 1088034 : DO ip = 0, para_env%num_pe - 1
2866 1088034 : IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2867 : ip_has_i = ip
2868 : EXIT
2869 : END IF
2870 : END DO
2871 944868 : ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2872 : ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2873 944868 : jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2874 944868 : grad = 0.0_dp
2875 3832149 : DO idim = 1, dim2
2876 2887281 : zi = zdiag_all(idim)%c_array(ii)
2877 2887281 : zj = zdiag_all(idim)%c_array(jj)
2878 3832149 : grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2879 : END DO
2880 1250647 : gmax = MAX(gmax, ABS(grad))
2881 : END DO
2882 : END DO
2883 :
2884 87052 : CALL para_env%max(gmax)
2885 87052 : tolerance = gmax
2886 87052 : IF (PRESENT(tol_out)) tol_out = tolerance
2887 :
2888 87052 : func = 0.0_dp
2889 392831 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2890 305779 : k = i - ns_bound(para_env%mepos, 1) + 1
2891 1319609 : DO idim = 1, dim2
2892 926778 : zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
2893 926778 : zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
2894 1232557 : func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2895 : END DO
2896 : END DO
2897 87052 : CALL para_env%sum(func)
2898 87052 : t2 = m_walltime()
2899 :
2900 87052 : IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
2901 440 : WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2902 440 : CALL m_flush(output_unit)
2903 : END IF
2904 87052 : IF (PRESENT(eps_localization)) THEN
2905 87020 : IF (tolerance < eps_localization) EXIT
2906 : END IF
2907 87052 : IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2908 86756 : CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2909 86756 : IF (should_stop) EXIT
2910 : END IF
2911 :
2912 : END DO ! lsweep
2913 :
2914 : ! buffer for message passing
2915 420 : DEALLOCATE (rmat_recv_all)
2916 :
2917 420 : DEALLOCATE (rmat_recv)
2918 420 : DEALLOCATE (rmat_send)
2919 420 : IF (ns_me > 0) THEN
2920 413 : DEALLOCATE (c_array_me)
2921 : END IF
2922 1704 : DO idim = 1, dim2
2923 1284 : DEALLOCATE (zdiag_me(idim)%c_array)
2924 1704 : DEALLOCATE (zdiag_all(idim)%c_array)
2925 : END DO
2926 420 : DEALLOCATE (zdiag_me)
2927 420 : DEALLOCATE (zdiag_all)
2928 420 : DEALLOCATE (xyz_mix)
2929 1704 : DO idim = 1, dim2
2930 1704 : IF (ns_me /= 0) THEN
2931 1263 : DEALLOCATE (xyz_mix_ns(idim)%c_array)
2932 : END IF
2933 : END DO
2934 420 : DEALLOCATE (xyz_mix_ns)
2935 420 : IF (ns_me /= 0) THEN
2936 413 : DEALLOCATE (gmat)
2937 : END IF
2938 420 : DEALLOCATE (mii)
2939 420 : DEALLOCATE (mij)
2940 420 : DEALLOCATE (mjj)
2941 420 : DEALLOCATE (list_pair)
2942 :
2943 840 : END SUBROUTINE jacobi_rot_para_core
2944 :
2945 : ! **************************************************************************************************
2946 : !> \brief ...
2947 : !> \param iperm ...
2948 : !> \param para_env ...
2949 : !> \param list_pair ...
2950 : ! **************************************************************************************************
2951 87052 : SUBROUTINE eberlein(iperm, para_env, list_pair)
2952 : INTEGER, INTENT(IN) :: iperm
2953 : TYPE(mp_para_env_type), POINTER :: para_env
2954 : INTEGER, DIMENSION(:, :) :: list_pair
2955 :
2956 : INTEGER :: i, ii, jj, npair
2957 :
2958 87052 : npair = (para_env%num_pe + 1)/2
2959 87052 : IF (iperm == 1) THEN
2960 : !..set up initial ordering
2961 261156 : DO I = 0, para_env%num_pe - 1
2962 174104 : II = ((i + 1) + 1)/2
2963 174104 : JJ = MOD((i + 1) + 1, 2) + 1
2964 261156 : list_pair(JJ, II) = i
2965 : END DO
2966 87052 : IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2967 0 : ELSEIF (MOD(iperm, 2) == 0) THEN
2968 : !..a type shift
2969 0 : jj = list_pair(1, npair)
2970 0 : DO I = npair, 3, -1
2971 0 : list_pair(1, I) = list_pair(1, I - 1)
2972 : END DO
2973 0 : list_pair(1, 2) = list_pair(2, 1)
2974 0 : list_pair(2, 1) = jj
2975 : ELSE
2976 : !..b type shift
2977 0 : jj = list_pair(2, 1)
2978 0 : DO I = 1, npair - 1
2979 0 : list_pair(2, I) = list_pair(2, I + 1)
2980 : END DO
2981 0 : list_pair(2, npair) = jj
2982 : END IF
2983 :
2984 87052 : END SUBROUTINE eberlein
2985 :
2986 : ! **************************************************************************************************
2987 : !> \brief ...
2988 : !> \param vectors ...
2989 : !> \param op_sm_set ...
2990 : !> \param zij_fm_set ...
2991 : ! **************************************************************************************************
2992 488 : SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2993 :
2994 : TYPE(cp_fm_type), INTENT(IN) :: vectors
2995 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2996 : TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2997 :
2998 : CHARACTER(len=*), PARAMETER :: routineN = 'zij_matrix'
2999 :
3000 : INTEGER :: handle, i, j, nao, nmoloc
3001 : TYPE(cp_fm_type) :: opvec
3002 :
3003 488 : CALL timeset(routineN, handle)
3004 :
3005 : ! get rows and cols of the input
3006 488 : CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3007 : ! replicate the input kind of matrix
3008 488 : CALL cp_fm_create(opvec, vectors%matrix_struct)
3009 :
3010 : ! Compute zij here
3011 1988 : DO i = 1, SIZE(zij_fm_set, 2)
3012 4988 : DO j = 1, SIZE(zij_fm_set, 1)
3013 3000 : CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3014 3000 : CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3015 : CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3016 4500 : zij_fm_set(j, i))
3017 : END DO
3018 : END DO
3019 :
3020 488 : CALL cp_fm_release(opvec)
3021 488 : CALL timestop(handle)
3022 :
3023 488 : END SUBROUTINE zij_matrix
3024 :
3025 : ! **************************************************************************************************
3026 : !> \brief ...
3027 : !> \param vectors ...
3028 : ! **************************************************************************************************
3029 36 : SUBROUTINE scdm_qrfact(vectors)
3030 :
3031 : TYPE(cp_fm_type), INTENT(IN) :: vectors
3032 :
3033 : CHARACTER(len=*), PARAMETER :: routineN = 'scdm_qrfact'
3034 :
3035 : INTEGER :: handle, ncolT, nrowT
3036 36 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
3037 : TYPE(cp_fm_struct_type), POINTER :: cstruct
3038 : TYPE(cp_fm_type) :: CTp, Qf, tmp
3039 :
3040 36 : CALL timeset(routineN, handle)
3041 :
3042 : ! Create Transpose of Coefficient Matrix vectors
3043 36 : nrowT = vectors%matrix_struct%ncol_global
3044 36 : ncolT = vectors%matrix_struct%nrow_global
3045 :
3046 : CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
3047 36 : nrow_global=nrowT, ncol_global=ncolT)
3048 36 : CALL cp_fm_create(CTp, cstruct)
3049 36 : CALL cp_fm_struct_release(cstruct)
3050 :
3051 108 : ALLOCATE (tau(nrowT))
3052 :
3053 36 : CALL cp_fm_transpose(vectors, CTp)
3054 :
3055 : ! Get QR decomposition of CTs
3056 36 : CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
3057 :
3058 : ! Construction of Q from the scalapack output
3059 : CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
3060 : context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
3061 36 : ncol_global=CTp%matrix_struct%nrow_global)
3062 36 : CALL cp_fm_create(Qf, cstruct)
3063 36 : CALL cp_fm_struct_release(cstruct)
3064 36 : CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
3065 :
3066 : ! Get Q
3067 36 : CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
3068 :
3069 : ! Transform original coefficient matrix vectors
3070 36 : CALL cp_fm_create(tmp, vectors%matrix_struct)
3071 36 : CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3072 36 : CALL cp_fm_to_fm(vectors, tmp)
3073 36 : CALL parallel_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
3074 :
3075 : ! Cleanup
3076 36 : CALL cp_fm_release(CTp)
3077 36 : CALL cp_fm_release(tmp)
3078 36 : CALL cp_fm_release(Qf)
3079 36 : DEALLOCATE (tau)
3080 :
3081 36 : CALL timestop(handle)
3082 :
3083 144 : END SUBROUTINE scdm_qrfact
3084 :
3085 0 : END MODULE qs_localization_methods
|